summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiddharth Ravikumar <sravik@bgsu.edu>2016-02-21 22:18:43 -0500
committerSiddharth Ravikumar <sravik@bgsu.edu>2016-02-21 22:18:43 -0500
commit5e7148867037140404bb1742bba9ab99d5e4d1b7 (patch)
treef705f4c9e26b77c186b3e7cdbf17493ad5ee2800
parent7a8f06bc1d51c60dfb15fbc1afff6d158537d15e (diff)
rough draft of chapter 3 ready.
-rw-r--r--report/bib/combox.bib133
-rw-r--r--report/chapters/3-lit-r.tex234
-rw-r--r--report/combox-report.pdfbin301376 -> 355216 bytes
-rw-r--r--report/combox.tex2
4 files changed, 368 insertions, 1 deletions
diff --git a/report/bib/combox.bib b/report/bib/combox.bib
index 57cbdb3..e3744e6 100644
--- a/report/bib/combox.bib
+++ b/report/bib/combox.bib
@@ -1,3 +1,136 @@
+%3
+@article{yeo,
+title = "Leveraging client-side storage techniques for enhanced use of multiple consumer cloud storage services on resource-constrained mobile devices ",
+journal = "Journal of Network and Computer Applications ",
+volume = "43",
+number = "",
+pages = "142 - 156",
+year = "2014",
+issn = "1084-8045",
+doi = "http://dx.doi.org/10.1016/j.jnca.2014.04.006",
+url = "http://www.sciencedirect.com/science/article/pii/S1084804514000897",
+author = "Hui-Shyong Yeo and Xiao-Shen Phang and Hoon-Jae Lee and Hyotaek Lim",
+keywords = "Multiple cloud storage",
+keywords = "Mobile devices",
+keywords = "Erasure coding",
+keywords = "Fault-tolerance",
+keywords = "Storage techniques "
+}
+
+@article{yang,
+title = "User continuance intention to use cloud storage service ",
+journal = "Computers in Human Behavior ",
+volume = "52",
+pages = "219 - 232",
+year = "2015",
+issn = "0747-5632",
+doi = "http://dx.doi.org/10.1016/j.chb.2015.05.057",
+url = "http://www.sciencedirect.com/science/article/pii/S074756321500446X",
+author = "Heng-Li Yang and Shiang-Lin Lin",
+keywords = "Cloud storage service",
+keywords = "Task-technology Fit",
+keywords = "Technology acceptance model",
+keywords = "Privacy risk "
+}
+
+@article{skycds,
+title = "SkyCDS: A resilient content delivery service based on diversified cloud storage ",
+journal = "Simulation Modelling Practice and Theory ",
+volume = "54",
+number = "",
+pages = "64 - 85",
+year = "2015",
+issn = "1569-190X",
+doi = "http://dx.doi.org/10.1016/j.simpat.2015.03.006",
+url = "http://www.sciencedirect.com/science/article/pii/S1569190X15000477",
+author = "J.L. Gonzalez and Jesus Carretero Perez and Victor J. Sosa-Sosa and Luis M. Sanchez and Borja Bergua",
+keywords = "Content delivery",
+keywords = "Multi-cloud storage",
+keywords = "Pub/Sub overlay",
+keywords = "Virtualization",
+keywords = "Diversification",
+keywords = "Risk management "
+}
+
+@misc{person:joeyh,
+title = "Joey Hess",
+url = "https://joeyh.name"
+}
+
+@misc{program:git-annex,
+title = "git-annex",
+url = "https://git-annex.branchable.com",
+}
+
+@misc{documentation:git-annex-hworks,
+title = "git-annex - how it works",
+url = "https://git-annex.branchable.com/how_it_works/"
+}
+
+@misc{documentation:git-annex-sremotes,
+title = "git-annex - special remotes",
+url = "https://git-annex.branchable.com/special_remotes/"
+}
+
+@misc{docs:git-annex-as3,
+title = "git-annex - special remote - Amazon S3",
+url = "https://git-annex.branchable.com/tips/using_Amazon_S3/"
+}
+
+@misc{program:git,
+title = "git - the stupid content tracker",
+url = "https://git-scm.com/"
+}
+
+@inbook{weatherspoon,
+author="Weatherspoon, Hakim and Kubiatowicz, John D.",
+editor="Druschel, Peter and Kaashoek, Frans and Rowstron, Antony",
+chapter="Erasure Coding Vs. Replication: A Quantitative Comparison",
+title="Peer-to-Peer Systems: First InternationalWorkshop, IPTPS 2002 Cambridge, MA, USA, March 7--8, 2002 Revised Papers",
+year="2002",
+publisher="Springer Berlin Heidelberg",
+address="Berlin, Heidelberg",
+pages="328--337",
+isbn="978-3-540-45748-0",
+doi="10.1007/3-540-45748-8_31",
+url="http://dx.doi.org/10.1007/3-540-45748-8_31"
+}
+
+@misc{protocol:oauth2,
+author="D. Hardt",
+title="{The OAuth 2.0 Authorization Framework}",
+series="Request for Comments",
+number="6749",
+howpublished="RFC 6749 (Proposed Standard)",
+publisher="IETF",
+organization="Internet Engineering Task Force",
+year=2012,
+month=oct,
+url="http://www.ietf.org/rfc/rfc6749.txt",
+}
+
+@misc{kaliski,
+ author="B. Kaliski",
+ title="{PKCS \#5: Password-Based Cryptography Specification Version 2.0}",
+ series="Request for Comments",
+ number="2898",
+ howpublished="RFC 2898 (Informational)",
+ publisher="IETF",
+ organization="Internet Engineering Task Force",
+ year=2000,
+ month=sep,
+ url="http://www.ietf.org/rfc/rfc2898.txt",
+}
+
+@inproceedings{jigdfs,
+title="Jigdfs: A secure distributed file system",
+author="Bian, Jiang and Seker, Remzi",
+booktitle="Computational Intelligence in Cyber Security, 2009. CICS'09. IEEE Symposium on",
+pages="76--82",
+year=2009,
+organization="IEEE"
+}
+
%4
@book{sicp,
author = "Harold Abelson and Gerald Jay Sussman and Julie Sussman",
diff --git a/report/chapters/3-lit-r.tex b/report/chapters/3-lit-r.tex
new file mode 100644
index 0000000..b7435fa
--- /dev/null
+++ b/report/chapters/3-lit-r.tex
@@ -0,0 +1,234 @@
+\chapter{Literature Review}
+
+\epigraph{Books serve to show a man that those original thoughts of
+ his aren't very new after all}{\textit{Abraham Lincoln}}
+
+The idea of unifying the storage provided by multiple Internet file
+storage providers and storing all the content in an encrypted form is
+not new, computer researchers/scientists, programmers have devised
+different methods to use multiple file storage providers' storage
+space. This chapter gives an overview of the work done by Yeo et
+al. in unifying the storage provided by Dropbox, Box, Google Drive and
+Skydrive on Android devices\cite{yeo}(Section \ref{3-yeo-sec});
+SkyCDS, a content delivery service, by Gonzalez et al., which uses
+publish/subscribe overly paradigm and stores the content across
+multiple ``cloud'' storage providers such that only part of the
+content (in encrypted form) is stored on each ``cloud'' storage
+provider\cite{skycds}(Section \ref{3-skycds-sec}); lastly,
+\verb+git-annex+, by Joey Hess\cite{person:joeyh}, that allows one to
+version control and keep track of large files with a possibility of
+encrypting files that are stored in ``special remotes'' -- storage
+provided by Internet file storage providers (Section
+\ref{3-gitannex-sec}).
+
+\section{Multi Cloud Storage Prototype}\label{3-yeo-sec}
+
+In their paper ``Leveraging client-side storage techniques for
+enhanced use of multiple consumer cloud storage services on
+resource-constrained mobile devices'', Yeo et al. show their Android
+mobile application, a prototype, which unifies storage provided by
+Dropbox, Box, Google Drive and SkyDrive. The application allows the
+user to store all their information in a single location on their
+phone and the application uses erasure coding\cite{weatherspoon} to
+split each file into \verb`n + k` fragments and spreads the encrypted
+fragments across storage provided by the file storage providers. All
+basic file operations -- Create, Rename, Update, Delete (CRUD) -- are
+possible. Information about the file stored in a unified location is
+stored in a SQLite database. Unlike combox, which depends the file
+storage provider' client to sync file fragments/shards to the file
+storage provider's server, the android application developed by Yeo et
+al. takes the responsibility to sync file fragments/shards to each
+file storage provider and usesd the OAuth 2.0\cite{protocal:oauth2}
+protocol for authorization.
+
+For encrypting file fragments, they use AES-256; they key for
+encrypting is derived from the user's password by using Password-Based
+Key Derivation Function (PBKDF2)\cite{kaliski}. For erasure coding
+they use the JigDFS librarary\cite{jigdfs}. The android application is
+able do ``progressive streaming'' of media files; this means that
+large media files can be streamed in real-time from the from the file
+storage providers' servers; this is an attractive feature in a
+``resource contrained'' device where storage is expensive.
+
+Yeo et al. propose methods for achieving data de-duplication, file
+fragment/shard compression based on the type of the file, intelligent
+pre-fetching and caching for file fragrments and ``automatic
+restoration in exploiting file-versioning''; these features were not
+implemented in the prototype Android application and there is
+possibility of Yeo et al. implementing these features in the future.
+
+It becomes that that Yeo et al. work is of immense importance when we
+take into consideration the research done by Yang et al., which found
+that 59\% of the users who use ``cloud storage service'' access the
+service through a smart phone and 42.2\% users access
+audio/video\cite{yang}. The research by Yang et al. definitely
+suggests a trend of users' preference for small hand-held computers
+over laptops and desktops.
+
+\section{SkyCDS}\label{3-skycds-sec}
+
+SkyCDS, by Gonzalez et al., is a content delivery system that splits
+and spreads the content across multiple ``cloud'' storage
+providers\cite{skycds}. According to Gonzalez et al., the main reason
+for designing and developing SkyCDS was to prevent content providers
+from getting locked into just one ``cloud'' storage provider and to
+minimize loss when a ``cloud'' storage provider goes out of business
+or if there is temporary outage in the storage service provided by the
+``cloud'' storage provider.
+
+In SkyCDS the content delivery to subscribers of the content is
+segregated into two distinct layers -- Metadata Flow Layer and the
+Content Flow Layer. The publisher of the content largely interacts
+with the Metadata Flow Layer that controls and keeps track of the what
+content is published and the subscriber also largely interacts with
+the Metadata Flow layer to subscribe to content published in the
+content delivery system. The Content Flow Layer is where the content
+is stored across multiple ``cloud'' storage providers. The publisher
+is responsible for publishing the content using eth ``delivery
+workflow'' (part of the Content Flow Layer) and the subscriber uses
+the ``retrieve workflow'' to get access to the subscribed content.
+
+When content has to be dispersed to $k$ ``cloud'' storage providers,
+the content is split into $n$ chunks, $n > k$, this file splitting
+seems to produce 66.7\% of redundancy overhead\cite{skycds}; this file
+splitting scheme looks very similar to erasure coding, but Gonzalez et
+al. don't explicitly state that the content splitting scheme is indeed
+``erasure coding''. The splitting of content is done by the ``delivery
+workflow'' engine which is invoked when the publisher triggers the
+action to publish the respective content to subscribers.
+
+To evaluate the effectiveness of SkyCDS, Gonzalez et al. state that
+they've done a case study using the data (content) obtained from
+European Space Astronomy Center (ESAC) for the Soil Moisture Ocean
+Salinity. In this study, a group of organizations, in two different
+continents, used SkyCDS to share satillete images with each
+other. According to Gonzalez et al. this study attested SkyCDS as a
+viable option for content delivery with respective to performance,
+cost of ``cloud'' storage space and reliability.
+
+\section{git-annex}\label{3-gitannex-sec}
+
+\verb+git-annex+ allows one to version controlled large files that are
+not usually feasible to version control under
+\verb+git+\cite{program:git}. \verb+git-annex+, checks in the names
+and other meta-data about the files in git and stores the actual
+content under \verb+.git/annex+ directory. When a file is added to
+\verb+git-annex+, a symlink of the file is created in place of th file
+and the content of the file itself is stored under the
+\verb+.git/annex+ directory.
+
+For instance, say there is a file called
+\verb+deb-nicholson-80s.medium.webm+ was downloaded from the Internet
+to the \verb+git-annex+ directory:
+
+\begin{verbatim}
+↳ git status
+On branch master
+Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+
+ deb-nicholson-80s.medium.webm
+
+↳ ls -l
+total 105708
+...
+-rw-r--r-- 1 rsd rsd 108196923 May 5 2015 deb-nicholson-80s.medium.webm
+...
+\end{verbatim}
+
+When this file is added to \verb+git-annex+ with \verb+git annex add+,
+the file turns into a symlink to a file under the \verb+.git/annex+
+directory:
+
+{\small
+\begin{verbatim}
+↳ git annex add deb-nicholson-80s.medium.webm
+add deb-nicholson-80s.medium.webm ok
+(recording state in git...)
+
+↳ ls -l
+...
+lrwxrwxrwx 1 rsd rsd 207 May 5 2015 deb-nicholson-80s.medium.webm -> ../.git/an
+nex/objects/3j/vG/SHA256E-s108196923--7de9484ee96908268e21b451eb9805552c32b44da08e7
+0ee861332c87352944f.webm/SHA256E-s108196923--7de9484ee96908268e21b451eb9805552c32b4
+4da08e70ee861332c87352944f.webm
+
+↳ git commit -m "Added video/deb-nicholson-80s.medium.webm"
+[master efa1775] Added video/deb-nicholson-80s.medium.webm
+ 1 file changed, 1 insertion(+)
+ create mode 120000 video/deb-nicholson-80s.medium.webm
+\end{verbatim}
+}
+
+Now, the file \verb+deb-nicholson-80s.medium.webm+ is checked into
+\verb+git-annex+ and we can now do a \verb+git annex sync+ to sync the
+repository to other \verb+git-annex+ repositories. It must be noted
+here that that when the repository is synced, the file content itself
+is not transferred to the other \verb+git-annex+ repositories; only
+the file's name and its meta-data that is stored in a separate git
+branch called \verb+git-annex+ are
+transferred\cite{documentation:git-annex-hworks}. In order to create a
+copy of a given file in another git annex repository,
+\verb+git annex get /path/to/filename.ext+ has to done.
+
+\verb+git-annex+ has this feature called ``special
+remotes''\cite{documentation:git-annex-sremotes}, that allows one to
+push/copy data to checked into \verb+git-annex+ to storage provided by
+``cloud'' storage providers. At the time of writing this report,
+\verb+git-annex+ supports pushing data to the following file storage
+services:
+
+{\scriptsize
+\begin{itemize}
+\item Amazon S3
+\item Amazon Glacier
+\item Internet Archive via S3
+\item Box.com
+\item Google drive
+\item Google Cloud Storage
+\item Mega.co.nz
+\item SkyDrive
+\item OwnCloud
+\item Flickr
+\item IMAP
+\item Usenet
+\item chef-vault
+\item hubiC
+\item pCloud
+\item ipfs
+\item Ceph
+\item Blackblaze's B2
+\end{itemize}
+}
+
+All data pushed to file storage provider's servers can be optionally
+encrypted using one's GPG key. For instance, to encrypt data that is
+pushed to the Amazon S3 special remote, following command is
+used\cite{docs:git-annex-as3}:
+
+\begin{verbatim}
+$ git annex initremote cloud type=S3 keyid=2512E3C7
+initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
+$ git annex describe cloud "at Amazon's US datacenter"
+describe cloud ok
+\end{verbatim}
+
+where \verb+2512E3C7+ is the id of the GPG key to use for encrypting
+data pushed to the Amazon S3 special remote. It is also possible to
+store each file that is pushed to the remotes as a set of chunks of
+size \verb+N+, to do that we do:
+
+\begin{verbatim}
+$ git annex initremote cloud type=S3 chunk=1MiB keyid=2512E3C7
+initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
+$ git annex describe cloud "at Amazon's US datacenter"
+describe cloud ok
+\end{verbatim}
+
+with that each file that has to be pushed to the Amazon S3 special
+remote is divided into 1MiB chunks, each chunk is encrypted using the
+GPG key \verb+2512E3C7+ and the encrypted chunks are finally pushed to
+the Amazon S3 remote. It is must be noted here that unlike the Multi
+Cloud Storage Prototype or SkyCDS or combox, in \verb+git-annex+ when
+we are using file chunking all the chunks go to the same location --
+in this case, the Amazon S3 remote.
diff --git a/report/combox-report.pdf b/report/combox-report.pdf
index 66eb538..b378c9a 100644
--- a/report/combox-report.pdf
+++ b/report/combox-report.pdf
Binary files differ
diff --git a/report/combox.tex b/report/combox.tex
index 576947c..3b5f943 100644
--- a/report/combox.tex
+++ b/report/combox.tex
@@ -356,7 +356,7 @@
\chapter{Background}
%% 3
-\chapter{Literature Review}
+\include{chapters/3-lit-r}
%% 4
\include{chapters/4-arch-d}