combox

split and encrypted files between online file storage providers
git clone git://git.ricketyspace.net/combox.git
Log | Files | Refs

commit 37385a90f90cb9d4dfd13d9d2e3cbcace8011e9e
parent 6e1133f5c6a3769a8303a3dcabcee20aaf40426e
Author: Siddharth Ravikumar <sravik@bgsu.edu>
Date:   Sun,  6 Sep 2015 19:38:41 -0400

Wrote a fix for Google Drive client behavior for file modification.

The unit test that simulates the behavior of Google Drive for file
modification runs successfully, but I need to test this fix, with the
Google Drive client monitoring a node directory, to confirm if the fix
actually works.

	modified:   combox/events.py
	modified:   tests/events_test.py

Diffstat:
combox/events.py | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
tests/events_test.py | 104++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 157 insertions(+), 14 deletions(-)

diff --git a/combox/events.py b/combox/events.py @@ -21,6 +21,7 @@ import logging from os import path from threading import Lock +from threading import Timer from watchdog.events import LoggingEventHandler @@ -205,6 +206,26 @@ class NodeDirMonitor(LoggingEventHandler): return False + def delete_later(self, file_cb_path): + """`file_cb_path' deleted if it is still under 'file_deleted'. + + This is used by the on_deleted method. + + This is a workaround to make combox predict official Google + Drive client's behavior. + """ + with self.lock: + num = self.silo.node_get('file_deleted', file_cb_path) + + if num == self.num_nodes: + # remove the corresponding file under the combox + # directory. + rm_path(file_cb_path) + # remove file info from silo. + self.silo.remove(file_cb_path) + self.silo.node_rem('file_deleted', file_cb_path) + + def housekeep(self): """Recursively traverses node directory, discovers changes and updates silo and combox directory. @@ -420,6 +441,32 @@ class NodeDirMonitor(LoggingEventHandler): if num == self.num_nodes: os.mkdir(file_cb_path) self.silo.node_rem('file_created', file_cb_path) + elif (not event.is_directory) and path.exists(file_cb_path): + # This can either mean the file was create on this + # computer or if this is a Google Drive node directory and + # the official Google Drive client is in use this means + # the file was modified. + # + # Google Drive client's behavior when a file (shard) is + # modified in the Google Drive node directory: + # + # - First it deletes the file. + # - Creates the latest version the file. + with self.lock: + num = self.silo.node_get('file_deleted', file_cb_path) + if num: + # This means we're in the Google Drive node + # directory and the official Google Drive client + # is in use and the file was actually modified on + # another computer. + self.silo.node_rem('file_deleted', file_cb_path) + self.silo.node_set('file_modified', file_cb_path) + num = self.silo.node_get('file_modified', file_cb_path) + if num == self.num_nodes: + decrypt_and_glue(file_cb_path, self.config) + # update db. + self.silo.update(file_cb_path) + self.silo.node_rem('file_modified', file_cb_path) elif (not event.is_directory) and (not path.exists(file_cb_path)): # shard created. @@ -460,14 +507,18 @@ class NodeDirMonitor(LoggingEventHandler): with self.lock: self.silo.node_set('file_deleted', file_cb_path) num = self.silo.node_get('file_deleted', file_cb_path) - - if num == self.num_nodes: - # remove the corresponding file under the combox - # directory. - rm_path(file_cb_path) - # remove file info from silo. - self.silo.remove(file_cb_path) - self.silo.node_rem('file_deleted', file_cb_path) + # If we are in a Google Drive node directory and + # the official Google Drive client is in use, at + # this point we cannot tell if the file was + # deleted; it can be a file modification or rename + # or deletion. + # + # Therefore, wait for 2secs and then delete the + # file_cb_path iff the file_cb_path was really + # removed on the another computer. + delayed_thread = Timer(3, self.delete_later, + [file_cb_path]) + delayed_thread.start() def on_modified(self, event): diff --git a/tests/events_test.py b/tests/events_test.py @@ -30,12 +30,14 @@ from nose.tools import * from watchdog.observers import Observer from combox.config import get_nodedirs -from combox.crypto import decrypt_and_glue, split_and_encrypt +from combox.crypto import (decrypt_and_glue, split_and_encrypt, + encrypt_shards) from combox.events import ComboxDirMonitor, NodeDirMonitor from combox.file import (relative_path, purge_dir, hash_file, read_file, write_file, move_shards, rm_shards, mk_nodedir, rm_nodedir, - move_nodedir, node_paths) + move_nodedir, node_paths, rm_path, + split_data, write_shards) from combox.silo import ComboxSilo from tests.utils import (get_config, shardedp, dirp, renamedp, @@ -400,7 +402,7 @@ class TestEvents(object): # Test - Shard deletion. rm_shards(the_guide, self.config) - time.sleep(1) + time.sleep(4) assert not path.exists(the_guide) ## check if the new file's info is removed from silo @@ -416,6 +418,99 @@ class TestEvents(object): observers[i].join() + def test_GoogleDrive_file_modify(self): + """Simulates Google Drive client's file modification behavior and + checks if combox is interpreting it properly. + """ + + nodes = get_nodedirs(self.config) + num_nodes = len(get_nodedirs(self.config)) + + nmonitors = [] + observers = [] + + # create an observer for each node directory and make it + # monitor them. + for node in nodes: + nmonitor = NodeDirMonitor(self.config, self.silo_lock, + self.nodem_lock) + observer = Observer() + observer.schedule(nmonitor, node, recursive=True) + observer.start() + + nmonitors.append(nmonitor) + observers.append(observer) + + # Test - shard modification + lorem_content = read_file(self.lorem) + self.lorem_copy = "%s.copy" % self.lorem + + copyfile(self.lorem, self.lorem_copy) + split_and_encrypt(self.lorem_copy, self.config, + lorem_content) + self.silo.update(self.lorem_copy) + shardedp(self.lorem_copy) + + lorem_copy_hash = self.silo.db.get(self.lorem_copy) + + ipsum_content = read_file(self.ipsum) + lorem_copy_content = "%s\n%s" % (lorem_content, ipsum_content) + + time.sleep(2) + + # Modify shards in the first n-1 node directories in the usual + # way. For the nth node directory simulate Google Drive + # official client's way of modifiying the shard. + + rel_path = relative_path(self.lorem_copy, self.config) + + # no. of shards = no. of nodes. + SHARDS = len(self.config['nodes_info'].keys()) + + f_shards = split_data(lorem_copy_content, SHARDS) + + # encrypt shards + ciphered_shards = encrypt_shards(f_shards, self.config['topsecret']) + + # write ciphered shards to disk + f_basename = rel_path + # gets the list of node' directories. + nodes = get_nodedirs(self.config) + last_node_index = len(nodes) - 1 + + nodes_subset = nodes[:last_node_index] + last_node = nodes[last_node_index] + + # write n-1 shards to the first n-1 node directories + write_shards(ciphered_shards, nodes_subset, f_basename) + + + # now for the nth shard, simulate Google Drive's client + # behavior. + last_shard_path = "%s.shard%d" % (path.join(last_node, f_basename), + last_node_index) + # remove the shard first + rm_path(last_shard_path) + # write the latest version of the shard + write_file(last_shard_path, ciphered_shards[last_node_index]) + time.sleep(3) + + self.silo.reload() + assert lorem_copy_content == read_file(self.lorem_copy) + + ## check if the lorem_copy's info is updated in silo + assert lorem_copy_hash != self.silo.db.get(self.lorem_copy) + assert_equal(None, self.silo.node_get('file_modified', + self.lorem_copy)) + + self.purge_list.append(self.lorem_copy) + + # stop the zarking observers. + for i in range(num_nodes): + observers[i].stop() + observers[i].join() + + def untest_NDM(self): """ Tests the NodeDirMonitor class. @@ -726,9 +821,6 @@ class TestEvents(object): rm_shards(self.TEST_FILE, self.config) - os.remove(self.lorem_ipsum) - rm_shards(self.lorem_ipsum, self.config) - rm_shards(self.lorem, self.config) rm_shards(self.ipsum, self.config)