remote and local file diff

This commit is contained in:
Amber 2024-06-12 13:52:52 +02:00
parent 0090a67c41
commit e399f2e9f9
14 changed files with 453 additions and 176 deletions

View File

@ -43,7 +43,7 @@ def recursive_diff_snap(last_tree, current_tree, path='./', bres={}):
item = { item = {
'name' : key_added, 'name' : key_added,
'path' : '%s' % (path,), 'path' : '%s' % (path,),
'type' : 'dir' if isinstance(current_tree[key_added], dict) else 'file', 'type' : 'd' if isinstance(current_tree[key_added], dict) else 'f',
'hash' : current_tree[key_added], 'hash' : current_tree[key_added],
} }
added.append(item) added.append(item)
@ -51,7 +51,7 @@ def recursive_diff_snap(last_tree, current_tree, path='./', bres={}):
for name_last, hsh_last in last_tree.items(): for name_last, hsh_last in last_tree.items():
hsh_current = current_tree.get(name_last) hsh_current = current_tree.get(name_last)
last_type = 'dir' if isinstance(hsh_last, dict) else 'file' last_type = 'd' if isinstance(hsh_last, dict) else 'f'
item = { item = {
'name' : name_last, 'name' : name_last,
@ -69,7 +69,7 @@ def recursive_diff_snap(last_tree, current_tree, path='./', bres={}):
removed.append(item) removed.append(item)
continue continue
current_type = 'dir' if isinstance(hsh_current, dict) else 'file' current_type = 'd' if isinstance(hsh_current, dict) else 'f'
if type(hsh_last) != type(hsh_current): if type(hsh_last) != type(hsh_current):
print(f'{path}{name_last} changed his type in {current_type}') print(f'{path}{name_last} changed his type in {current_type}')
@ -80,6 +80,7 @@ def recursive_diff_snap(last_tree, current_tree, path='./', bres={}):
changed.append(item) changed.append(item)
continue continue
# current node is a file
if isinstance(hsh_last, str): if isinstance(hsh_last, str):
if hsh_last != hsh_current: if hsh_last != hsh_current:
print(f'file {path}{name_last} changed his hash') print(f'file {path}{name_last} changed his hash')
@ -87,11 +88,12 @@ def recursive_diff_snap(last_tree, current_tree, path='./', bres={}):
'last_type' : last_type, 'last_type' : last_type,
'current_type' : current_type, 'current_type' : current_type,
'cur_hash' : hsh_current, 'cur_hash' : hsh_current,
'last_hash' : hsh_last,
}) })
changed.append(item) changed.append(item)
continue continue
# name is dir # current node is dir
if (hsh_last == hsh_current): if (hsh_last == hsh_current):
print(f'file {path}{name_last} subtree unchanged ') print(f'file {path}{name_last} subtree unchanged ')
continue continue
@ -108,6 +110,8 @@ def diff_snap(last_tree, current_tree, path='./'):
res = {} res = {}
if not path.endswith(os.sep): path = path + os.sep if not path.endswith(os.sep): path = path + os.sep
recursive_diff_snap(last_tree, current_tree, path=path, bres=res) recursive_diff_snap(last_tree, current_tree, path=path, bres=res)
# return res
## Managing moved files is too difficult - for now skip this step
# compute moved could save bandwidth # compute moved could save bandwidth
res['moved'] = [] res['moved'] = []
for n, r in enumerate(res['removed']): for n, r in enumerate(res['removed']):

View File

@ -6,6 +6,9 @@ from pathlib import Path
from lib.snapshot.generate import local as _genlocal from lib.snapshot.generate import local as _genlocal
from lib.snapshot import dump as _dump from lib.snapshot import dump as _dump
from lib.snapshot.generate.remote import RHAgent
from lib.diff import fdiff
from iface import snap from iface import snap
@ -23,13 +26,21 @@ class Manager():
if not local_path or not remote_path: if not local_path or not remote_path:
raise Exception('Please specify a local path and a remote path to sync') raise Exception('Please specify a local path and a remote path to sync')
self.local_path = self.normalize_path(local_path) self.local_path = self.normalize_path(local_path)
self.remote_path = self.normalize_path(remote_path) # self.remote_path = self.normalize_path(remote_path)
self.init_remote_params(remote_path)
# self.check_conf() # self.check_conf()
# conf_file = conf_file or DEFAULT_MANAGER_CONF # conf_file = conf_file or DEFAULT_MANAGER_CONF
# self.conf = _parse_conf.read_conf(conf_file) # self.conf = _parse_conf.read_conf(conf_file)
def init_remote_params(self, remote_path):
username, other = remote_path.split('@')
hostname, remote_path = other.split(':')
self.username = username
self.hostname = hostname
self.remote_path = self.normalize_path(remote_path)
def check_init_conf(self): def check_init_conf(self):
local_path = Path(self.local_path) local_path = Path(self.local_path)
if not local_path.exists(): if not local_path.exists():
@ -47,7 +58,7 @@ class Manager():
if path.endswith(os.sep): return path if path.endswith(os.sep): return path
return '%s%s' % (path, os.sep) return '%s%s' % (path, os.sep)
def mirror(self, dry_run=True): def mirror(self, dry_run=False):
''' '''
do an itial rsync do an itial rsync
@ -62,26 +73,20 @@ class Manager():
local_path = self.local_path local_path = self.local_path
remote_path = self.remote_path remote_path = self.remote_path
dry_run_flag = ''
username = self.username
hostname = self.hostname
dry_run_flag = ''
if dry_run: if dry_run:
dry_run_flag = ' --dry-run' dry_run_flag = ' --dry-run'
cmd = ["rsync"] cmd_str = f'rsync -i -aPu --progress{dry_run_flag} -e ssh {username}@{hostname}:{remote_path} {local_path}'
cmd.extend(["-i"]) cmd = cmd_str.split(' ')
cmd.extend(["-aPu"]) print(f'cmd: {cmd}')
cmd.extend(["--progress"])
cmd.extend(["--delete"])
# cmd.extend(["--out-format=\"%i %n%L$ %''b\""])
cmd.extend(["-e"])
cmd.extend(["ssh"])
remote_path = self.remote_path
cmd.extend(["%s" % (remote_path,)])
local_path = self.local_path
cmd.extend(["%s" % (local_path,)])
subprocess.run(cmd) subprocess.run(cmd_str.split(' '))
def compute_local_hash(self): def compute_local_hash(self):
local_path = self.local_path local_path = self.local_path
@ -94,11 +99,31 @@ class Manager():
snapshot_dump_path = self.normalize_path(snapshot_dump_path) snapshot_dump_path = self.normalize_path(snapshot_dump_path)
return snapshot_dump_path return snapshot_dump_path
def dump_local_hash(self, hsh): def get_unmerged_path(self):
local_path = self.local_path
unmerged_path = '%s%s%s%s' % (local_path, '.masy', os.sep, 'unmerged')
unmerged_path = self.normalize_path(unmerged_path)
return unmerged_path
def get_tree_diff_path(self, node):
local_path = self.local_path
tree_diff_path = '%s%s%s%s' % (local_path, '.masy', os.sep, 'diff')
tree_diff_path = self.normalize_path(tree_diff_path)
# node_path = node['path']
# node_name = node['name']
# return tree_diff_path
return node['path'].replace('.%s' % (os.sep), tree_diff_path)
def dump_local_hash(self, hsh=None):
# local_path = self.local_path # local_path = self.local_path
# snapshot_dump_path = '%s%s%s%s' % (local_path, '.masy', os.sep, 'snapshot') # snapshot_dump_path = '%s%s%s%s' % (local_path, '.masy', os.sep, 'snapshot')
if hsh is None:
hsh = self.compute_local_hash()
snapshot_dump_path = self.get_snapshot_path() snapshot_dump_path = self.get_snapshot_path()
try:
os.makedirs(snapshot_dump_path) os.makedirs(snapshot_dump_path)
except FileExistsError:
print(f'{snapshot_dump_path} already exists skip creation')
_dump.dump_snapshot(hsh, path=snapshot_dump_path) _dump.dump_snapshot(hsh, path=snapshot_dump_path)
def load_local_hash(self): def load_local_hash(self):
@ -108,6 +133,20 @@ class Manager():
snapshot = _dump.load_snapshot(snapshot_dump_path) snapshot = _dump.load_snapshot(snapshot_dump_path)
return snapshot return snapshot
def get_absolute_local_path(self, node):
node_path = node['path']
node_name = node['name']
local_file_path = '%s%s' % (node_path, node_name)
local_file_path = local_file_path.replace('.%s' % (os.sep), self.local_path)
return local_file_path
def get_absolute_remote_path(self, node):
node_path = node['path']
node_name = node['name']
remote_file_path = '%s%s' % (node_path, node_name)
remote_file_path = remote_file_path.replace('.%s' % (os.sep), self.remote_path)
return remote_file_path
def add_sync_to_list(self): def add_sync_to_list(self):
home_dir = os.environ.get('HOME') home_dir = os.environ.get('HOME')
home_dir = self.normalize_path(home_dir) home_dir = self.normalize_path(home_dir)
@ -126,13 +165,156 @@ class Manager():
def init_sync(self): def init_sync(self):
self.check_init_conf() self.check_init_conf()
self.mirror() self.mirror()
## maybe it is better to put all in dump_local_hash
local_hash = self.compute_local_hash() local_hash = self.compute_local_hash()
self.dump_local_hash(local_hash) self.dump_local_hash(local_hash)
self.add_sync_to_list() self.add_sync_to_list()
def sync(self): # def init_rh_agent(self):
# return RHAgent(self.hostname, self.username)
def get_agent(self):
try:
return self.rhagent
except: pass
self.rhagent = RHAgent(self.hostname, self.username)
return self.rhagent
def get_local_snap_diff(self):
last_tree = self.load_local_hash() last_tree = self.load_local_hash()
current_tree = self.compute_local_hash() current_tree = self.compute_local_hash()
local_snap_diff = snap.diff_snap(last_tree, current_tree) local_snap_diff = snap.diff_snap(last_tree, current_tree)
return local_snap_diff return local_snap_diff
def get_remote_snap_diff(self):
pass
def store_unmerged_diff(self, nodes):
'''
node_path is the relative path
'''
# path = node['path']
# name = node['name']
unmerged_path = self.get_unmerged_path()
# unmerged_path = Path(self.local_path)
# if not local_path.exists():
try:
os.makedirs(unmerged_path)
except FileExistsError:
print(f'{unmerged_path} already exists skip creation of unmerged')
## generating tree diff
# tree_diff_path = self.get_tree_diff_path()
todump = []
for node in nodes:
# node_name = node['name']
# node_path = node['path']
#
# node_local_path = '%s%s%s' % (self.local_path, )
node_local_path = self.get_absolute_local_path(node)
filea_block_tag = '%s%s (local)' % (node['name'], node['cur_hash'])
fileb_block_tag = '%s%s (remote)' % (node['name'], node['remote_hash'])
# outfile =
rfile_buf = node.pop('remote_file')
node_diff_path = self.get_tree_diff_path(node)
try:
os.makedirs(node_diff_path)
except FileExistsError:
print(f'{node_diff_path} already exists skip creation')
outfile = '%s%s' % (self.normalize_path(node_diff_path), node['name'])
fdiff.print_diff(node_local_path, rfile_buf, remove_diff_letters_code=False, outfile=outfile, filea_block_tag=filea_block_tag, fileb_block_tag=fileb_block_tag)
todump.append(node)
_dump.dump_snapshot(todump, path=unmerged_path, dump_file_name='.unmerged.json.gz')
def load_unmerged_diff(self):
unmerged_path = self.get_unmerged_path()
dump_file_name='.unmerged.json.gz'
nodes = _dump.load_snapshot(unmerged_path, dump_file_name=dump_file_name)
return nodes
#### this part is for generating the diffs tree
# agent = self.get_agent()
##
##
# unmerged_localpath = path.replace('.%s' % (os.sep,), unmerged_path)
# path = path.replace('.%s' % (os.sep,), self.remote_path)
# try:
# # creating local path
# os.makedirs(unmerged_localpath)
# except FileExistsError:
# print(f'{unmerged_localpath} already exists skip creation of unmerged')
# remote_path = '%s%s' % (path, name)
# # print(f'local path where to store: {unmerged_localpath}, remote_path: {remote_path}')
# agent.get(remote_path, '%s%s' % (unmerged_localpath, name), lambda a,b: print('remote file saved in {unmerged_path}'))
def sync(self):
'''
sync:
- calculate local diff
- collect necessary action to sync server with this node
- do a remote diff and compare with the last version in this node
if there are not differencies simply do necessary actions to sync
if there is differences put the file diff in a specific folder
'''
# self.init_rh_agent()
# last_tree = self.load_local_hash()
# current_tree = self.compute_local_hash()
local_snap_diff = self.get_local_snap_diff()
# rhagent = self.init_rh_agent()
agent = self.get_agent()
changes = local_snap_diff.get('changed') or []
# self.store_unmerged_diff(node)
unmerged = []
for node in changes:
node_name = node['name']
node_path = node['path']
node_current_type = node['current_type']
node_last_type = node['last_type']
node_current_hash = node['cur_hash']
node_last_hash = node['last_hash']
if not (node_current_type == node_last_type):
print(f'node {node_name} change type in local tree')
if node_last_type == 'f':
# remote_file_path = '%s%s' % (node_path, node_name)
# remote_file_path = remote_file_path.replace('.%s' % (os.sep), self.remote_path)
remote_file_path = self.get_absolute_remote_path(node)
rfile_buf, rhash = agent.generate_file_hash_oversftp(remote_file_path, return_also_buffer=True)
if node_last_hash == rhash:
print(f'You can proceed to push {node_name} file it is not changed from the last version')
# local_file_path = '%s%s' % (node_path, node_name)
# local_file_path = local_file_path.replace('.%s' % (os.sep), self.local_path)
local_file_path = self.get_absolute_local_path(node)
agent.put(local_file_path, remote_file_path, lambda x,y: print(f'{local_file_path} copied correctly to remote'))
else:
print(f'{node_path} file it changed local hash: {node_last_hash}, remote hash {rhash}, you can\'t push it directly')
# self.store_unmerged_diff(node)
node['remote_hash'] = rhash
node['remote_file'] = rfile_buf
unmerged.append(node)
if node_last_type == 'd':
# remote_path = '%s%s' % (node_path, node_name)
# remote_path = remote_file_path.replace('.%s' % (os.sep), self.remote_path)
remote_path = self.get_absolute_local_path(node)
rhash = agent.generate_tree_hash_oversftp(remote_path)
if node_last_hash == rhash:
print(f'You can proceed to push {node_name} folder it is not changed from the last version')
else:
print(f'{node_name} folder it changed from the last version, you can\'t push it directly')
# print('Remote hash: %s' % (rhash,))
if unmerged:
self.store_unmerged_diff(unmerged)
# self.generate_tree_diff(unmerged)
return local_snap_diff

View File

@ -1,40 +1,67 @@
import io
from pathlib import Path from pathlib import Path
import difflib import difflib
def compute_diff(filea_path, fileb_path): def is_buf(f):
with open(filea_path, 'r') as fa: if isinstance(f, io.BytesIO):
alines = fa.readlines() return True
with open(fileb_path, 'r') as fb: if isinstance(f, io.StringIO):
blines = fb.readlines() return True
return False
def get_lines(file):
if is_buf(file):
return file.readlines()
with open(file, 'r') as f:
lines = f.readlines()
return lines
def compute_diff(filea, fileb):
'''
file* can be a path or a buffer
'''
alines = get_lines(filea)
blines = get_lines(fileb)
differ = difflib.Differ() differ = difflib.Differ()
res_diff = list(differ.compare(alines, blines)) res_diff = list(differ.compare(alines, blines))
return res_diff return res_diff
def format_diff(filea_path, fileb_path, remove_diff_letters_code=False): def get_file_block_tag(file, file_block_tag):
if is_buf(file):
assert file_block_tag
return file_block_tag
if file_block_tag:
return file_block_tag
## file is a path
file_obj = Path(file)
file_block_tag = file_obj.name
return file_block_tag
def format_diff(filea, fileb, remove_diff_letters_code=False, filea_block_tag='', fileb_block_tag=''):
''' '''
"- " line unique to file1 "- " line unique to file1
"+ " line unique to file2 "+ " line unique to file2
" " line common to both files " " line common to both files
"? " line not present in either input file -> to skip it "? " line not present in either input file -> to skip it
''' '''
filea_obj = Path(filea_path)
fileb_obj = Path(fileb_path)
filea_name = filea_obj.name
fileb_name = fileb_obj.name
filea_group_wrap_tmpl = '<<<<<<< '+ filea_name + '\n%s=======\n'
fileb_group_wrap_tmpl = '>>>>>>> ' + fileb_name + '\n%s=======\n'
lines_diff = compute_diff(filea_path, fileb_path) filea_block_tag = get_file_block_tag(filea, filea_block_tag)
fileb_block_tag = get_file_block_tag(fileb, fileb_block_tag)
filea_group_wrap_tmpl = '<<<<<<< '+ filea_block_tag + '\n%s=======\n'
fileb_group_wrap_tmpl = '>>>>>>> ' + fileb_block_tag + '\n%s=======\n'
lines_diff = compute_diff(filea, fileb)
list_blocks = [] list_blocks = []
''' '''
list blocks contains object like this list blocks contains object like this
{ {
'block_source' : `filea_name|fileb_name|common` 'block_source' : `filea_block_tag|fileb_block_tag|common`
'block_lines' : `lines of that block` 'block_lines' : `lines of that block`
} }
''' '''
@ -44,11 +71,11 @@ def format_diff(filea_path, fileb_path, remove_diff_letters_code=False):
if line.startswith('- '): if line.startswith('- '):
## line in filea ## line in filea
block_source = filea_name block_source = filea_block_tag
wrap_tmpl = filea_group_wrap_tmpl wrap_tmpl = filea_group_wrap_tmpl
elif line.startswith('+ '): elif line.startswith('+ '):
## line in fileb ## line in fileb
block_source = fileb_name block_source = fileb_block_tag
wrap_tmpl = fileb_group_wrap_tmpl wrap_tmpl = fileb_group_wrap_tmpl
elif line.startswith('? '): elif line.startswith('? '):
continue continue
@ -84,8 +111,8 @@ def format_diff(filea_path, fileb_path, remove_diff_letters_code=False):
return list_blocks return list_blocks
def print_diff(filea_path, fileb_path, remove_diff_letters_code=False, outfile=''): def print_diff(filea_path, fileb_path, remove_diff_letters_code=False, outfile='', filea_block_tag='', fileb_block_tag=''):
formatted = format_diff(filea_path, fileb_path, remove_diff_letters_code=remove_diff_letters_code) formatted = format_diff(filea_path, fileb_path, remove_diff_letters_code=remove_diff_letters_code, filea_block_tag=filea_block_tag, fileb_block_tag=fileb_block_tag)
out_str = '' out_str = ''

Binary file not shown.

View File

@ -1,13 +1,11 @@
import stat import stat
import paramiko from lib.sclient.base_agent import BaseAgent
import paramiko.sftp_client as _sftp_client
HOSTS_KEYS_PATH = '/home/luca/.ssh/known_hosts' # import paramiko.sftp_client as _sftp_client
PKEY_PATH = '/home/luca/.ssh/notanamber_rsa'
HOSTNAME = '107.152.32.78' # HOSTNAME = '107.152.32.78'
USERNAME = 'notanamber' # USERNAME = 'notanamber'
''' '''
@ -21,23 +19,16 @@ def downLoadFile(sftp, remotePath, localPath):
sftp.get(fileattr.filename, os.path.join(localPath, fileattr.filename)) sftp.get(fileattr.filename, os.path.join(localPath, fileattr.filename))
''' '''
class SyncAgent(): class SyncAgent(BaseAgent):
'''
it give to you a sftp client for using to compute remote hash
and basic ftp command to synchronize local and remote
'''
def __init__(self): def __init__(self, hostname, username):
self.pkey = paramiko.RSAKey.from_private_key_file(PKEY_PATH) BaseAgent.__init__(self, hostname, username)
self.client = paramiko.SSHClient()
self.client.load_host_keys(filename=HOSTS_KEYS_PATH)
self.sftpc = None self.sftpc = None
def connect(self):
'''
this method generates the underlying `Trasport`
'''
self.client.connect(hostname=HOSTNAME, username=USERNAME, pkey=self.pkey, look_for_keys=False)
def close(self):
self.client.close()
def compute_hash(self, name): def compute_hash(self, name):
self.connect() self.connect()
stdin, stdout, stderr = self.client.exec_command("sha256sum %s | awk '{print $1}'" % (name, )) stdin, stdout, stderr = self.client.exec_command("sha256sum %s | awk '{print $1}'" % (name, ))
@ -68,6 +59,26 @@ class SyncAgent():
continue continue
print('name: %s is regular FILE' % (attr.filename)) print('name: %s is regular FILE' % (attr.filename))
def put(self, localpath, remotepath, callback=None):
'''
wrap the method put of paramiko sftp client
Copy a local file (localpath) to the SFTP server as remotepath.
Any exception raised by operations will be passed through. This method is primarily provided as a convenience.
'''
sftpc = self.get_sftp_client()
sftpc.put(localpath, remotepath, callback=callback)
synca = SyncAgent() def get(self, localpath, remotepath, callback=None):
sftpc = self.get_sftp_client()
sftpc.get(localpath, remotepath, callback=callback)
def open(self, remotepath):
sftpc = self.get_sftp_client()
buf = ''
with sftpc.open(remotepath,mode='r') as rfile:
buf = rfile.read()
return buf
# synca = SyncAgent()
# sftpc = a.get_sftp_client() # sftpc = a.get_sftp_client()

View File

@ -0,0 +1,36 @@
import paramiko
from paramiko import SSHConfig
HOSTS_KEYS_PATH = '/home/luca/.ssh/known_hosts'
PKEY_PATH = '/home/luca/.ssh/notanamber_rsa'
class BaseAgent():
'''
Basic ssh connection layer
'''
def __init__(self, hostname, username):
self.hostname = hostname
self.username = username
self.pkey = paramiko.RSAKey.from_private_key_file(PKEY_PATH)
self.client = paramiko.SSHClient()
self.client.load_host_keys(filename=HOSTS_KEYS_PATH)
def get_hostname_from_sshconfig(self):
ssh_config_path = "/home/luca/.ssh/config"
ssh_host = self.hostname
config = SSHConfig()
config_file = open(ssh_config_path)
config.parse(config_file)
readed = config.lookup(ssh_host)
return readed['hostname']
def connect(self):
'''
this method generates the underlying `Trasport`
'''
hostname = self.get_hostname_from_sshconfig()
self.client.connect(hostname=hostname, username=self.username, pkey=self.pkey, look_for_keys=False)
def close(self):
self.client.close()

View File

@ -35,6 +35,9 @@ def generate_tree_hash(root_path :str):
items = os.listdir(root_path) items = os.listdir(root_path)
for item in items: for item in items:
## exclude folder for the tree
if item in ['.masy']:
continue
absolute_item_path = root_path + item absolute_item_path = root_path + item
print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item, os.path.isdir(absolute_item_path))) print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item, os.path.isdir(absolute_item_path)))
if os.path.isdir(absolute_item_path): if os.path.isdir(absolute_item_path):

View File

@ -1,69 +1,78 @@
import stat import stat
import os import os
import hashlib import hashlib
import time
import json import json
import io
import gzip import gzip
import asyncio import asyncio
import functools import functools
import time from lib.sclient.agent import SyncAgent
from lib.sclient import agent as _agent class RHAgent():
def __init__(self, hostname, username):
## load ssh config and read the ip of hostname
self.hostname = hostname
self.username = username
self.agent = SyncAgent(hostname, username)
def get_sftp_client():
a = self.agent
return a.get_sftp_client()
def generate_rfile_hash(file_path, hexdigest=True, client=None): def generate_rfile_hash(self, file_path, hexdigest=True, return_also_buffer=False):
if not client: a = self.agent
a = _agent.synca
client = a.get_sftp_client() client = a.get_sftp_client()
print(f'Trying to get {file_path}')
with client.open(file_path, "rb") as f: with client.open(file_path, "rb") as f:
buf = f.read() buf = f.read()
if hexdigest: return hashlib.md5(buf).hexdigest() if hexdigest:
return hashlib.md5(buf).digest() dig = hashlib.md5(buf).hexdigest()
else:
dig = hashlib.md5(buf).digest()
# def check_isdir(path: str): if not return_also_buffer:
# if not os.path.isdir(path): return dig
# raise Exception('Provide a valid folder to start the hashing')
#
# if not path.endswith(os.path.sep):
# path = path + os.path.sep
# return path
def generate_tree_hash_oversftp(root_path :str): return (io.BytesIO(buf), dig)
def generate_tree_hash_oversftp(self, root_path :str):
''' '''
@param root_path string, root_path in remote server @param root_path string, root_path in remote server
generate a map of hashes starting from `root_path` recursively generate a map of hashes starting from `root_path` recursively
''' '''
# if not os.path.isdir(root_path):
# raise Exception('Provide a valid folder to start the hashing')
#
# if not root_path.endswith(os.path.sep):
# root_path = root_path + os.path.sep
# root_path = check_isdir(root_path)
rtreemap = {} rtreemap = {}
if not root_path.endswith(os.path.sep): if not root_path.endswith(os.path.sep):
root_path = root_path + os.path.sep root_path = root_path + os.path.sep
a = _agent.synca a = self.agent
sftpc = a.get_sftp_client() sftpc = a.get_sftp_client()
for item in sftpc.listdir_attr(root_path): for item in sftpc.listdir_attr(root_path):
absolute_item_path = root_path + item.filename absolute_item_path = root_path + item.filename
print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item.filename, stat.S_ISDIR(item.st_mode))) print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item.filename, stat.S_ISDIR(item.st_mode)))
if stat.S_ISDIR(item.st_mode): if stat.S_ISDIR(item.st_mode):
rtreemap[item.filename] = generate_tree_hash_oversftp(absolute_item_path) rtreemap[item.filename] = self.generate_tree_hash_oversftp(absolute_item_path)
else: else:
rtreemap[item.filename] = generate_rfile_hash(absolute_item_path, client=sftpc) rtreemap[item.filename] = self.generate_rfile_hash(absolute_item_path)
return rtreemap return rtreemap
async def generate_rfile_hash_async(file_path, hexdigest=True, client=None): def generate_file_hash_oversftp(self, file_path: str, return_also_buffer: bool=False):
if not client: a = self.agent
a = _agent.synca sftpc = a.get_sftp_client()
return self.generate_rfile_hash(file_path, return_also_buffer=return_also_buffer)
async def generate_rfile_hash_async(self, file_path, hexdigest=True):
a = self.agent
client = a.get_sftp_client() client = a.get_sftp_client()
with client.open(file_path, "rb") as f: with client.open(file_path, "rb") as f:
@ -72,7 +81,7 @@ async def generate_rfile_hash_async(file_path, hexdigest=True, client=None):
if hexdigest: return hashlib.md5(buf).hexdigest() if hexdigest: return hashlib.md5(buf).hexdigest()
return hashlib.md5(buf).digest() return hashlib.md5(buf).digest()
async def generate_tree_hash_oversftp_async(root_path :str): async def generate_tree_hash_oversftp_async(self, root_path :str):
''' '''
@param root_path string, root_path in remote server @param root_path string, root_path in remote server
generate a map of hashes starting from `root_path` recursively generate a map of hashes starting from `root_path` recursively
@ -89,7 +98,7 @@ async def generate_tree_hash_oversftp_async(root_path :str):
if not root_path.endswith(os.path.sep): if not root_path.endswith(os.path.sep):
root_path = root_path + os.path.sep root_path = root_path + os.path.sep
a = _agent.synca a = self.agent
sftpc = a.get_sftp_client() sftpc = a.get_sftp_client()
def dtask_done_cback(fname, f): def dtask_done_cback(fname, f):
@ -107,12 +116,12 @@ async def generate_tree_hash_oversftp_async(root_path :str):
print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item.filename, stat.S_ISDIR(item.st_mode))) print('absolute_item_path: %s, item %s, isdir: %s' % (absolute_item_path, item.filename, stat.S_ISDIR(item.st_mode)))
if stat.S_ISDIR(item.st_mode): if stat.S_ISDIR(item.st_mode):
# rtreemap[item.filename] = await generate_tree_hash_oversftp_async(absolute_item_path) # rtreemap[item.filename] = await generate_tree_hash_oversftp_async(absolute_item_path)
dtask = asyncio.create_task(generate_tree_hash_oversftp_async(absolute_item_path)) dtask = asyncio.create_task(self.generate_tree_hash_oversftp_async(absolute_item_path))
dtask.add_done_callback(functools.partial(dtask_done_cback, item.filename)) dtask.add_done_callback(functools.partial(dtask_done_cback, item.filename))
tasks.append(dtask) tasks.append(dtask)
else: else:
# rtreemap[item.filename] = await generate_rfile_hash_async(absolute_item_path, client=sftpc) # rtreemap[item.filename] = await generate_rfile_hash_async(absolute_item_path, client=sftpc)
ftask = asyncio.create_task(generate_rfile_hash_async(absolute_item_path, client=sftpc)) ftask = asyncio.create_task(self.generate_rfile_hash_async(absolute_item_path))
ftask.add_done_callback(functools.partial(ftask_done_cback, item.filename)) ftask.add_done_callback(functools.partial(ftask_done_cback, item.filename))
tasks.append(ftask) tasks.append(ftask)
# item.filename # item.filename
@ -123,7 +132,8 @@ async def generate_tree_hash_oversftp_async(root_path :str):
def test_sync_rtree(path='/home/notanamber/notes_dev/'): def test_sync_rtree(path='/home/notanamber/notes_dev/'):
start = time.time() start = time.time()
print('Start task') print('Start task')
rtree = generate_tree_hash_oversftp(path) rhagent = RHAgent('myvps', 'notanamber')
rtree = rhagent.generate_tree_hash_oversftp(path)
end = time.time() end = time.time()
print('task done in %.2f' % (end - start)) print('task done in %.2f' % (end - start))
return rtree return rtree
@ -131,11 +141,16 @@ def test_sync_rtree(path='/home/notanamber/notes_dev/'):
def test_async_rtree(path='/home/notanamber/notes_dev/'): def test_async_rtree(path='/home/notanamber/notes_dev/'):
start = time.time() start = time.time()
print('Start task') print('Start task')
rtree = asyncio.run(generate_tree_hash_oversftp_async(path)) rhagent = RHAgent('myvps', 'notanamber')
rtree = asyncio.run(rhagent.generate_tree_hash_oversftp_async(path))
end = time.time() end = time.time()
print('task done in %.2f' % (end - start)) print('task done in %.2f' % (end - start))
return rtree return rtree
def get_test_agent():
rhagent = RHAgent('myvps', 'notanamber')
a = rhagent
return a
''' '''
@ -159,7 +174,6 @@ tree['a'] = {
''' '''
root_path = '/home/luca/rsyn_test_fap'