From caacaa5dd5670ad2f8db2c35070157a065dba9c9 Mon Sep 17 00:00:00 2001 From: JoelShepard <57812697+JoelShepard@users.noreply.github.com> Date: Tue, 10 Jun 2025 15:01:21 +0200 Subject: [PATCH] v1.2 - a better uploading feature with file merging --- .gitignore | 1 + README.md | 10 +++++ main.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 122 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index ba4d2d0..a4c0ae9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ kindle_books/* __pycache__/ .ropeproject +.claude/ # Configuration files with sensitive data config.json diff --git a/README.md b/README.md index 104df62..ffb80a4 100644 --- a/README.md +++ b/README.md @@ -93,3 +93,13 @@ Each book generates a Markdown file with citations formatted as blockquotes: ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## v1.2 + +Novità in questa release: + +- Merge avanzato di citazioni locali e remote con risoluzione carattere-per-carattere +- Risoluzione dei conflitti: si mantiene la versione remota per modifiche minori e quella locale per modifiche maggiori +- Preservazione delle righe vuote e rimozione automatica di backslash indesiderati attorno a `[[…]]` +- Gestione dei nomi file URL-encoded per corretto matching +- Nuovo flag CLI `--clear` per eliminare esplicitamente i file remoti prima dell'upload diff --git a/main.py b/main.py index a6630b1..1be1af2 100644 --- a/main.py +++ b/main.py @@ -491,6 +491,113 @@ def upload_files_to_webdav(file_paths): print("Error: WebDAV username or password not configured") return [], file_paths + auth = HTTPBasicAuth(WEBDAV_USERNAME, WEBDAV_PASSWORD) + + # Merge local and remote changes before upload + print("Merging local and remote changes...") + from urllib.parse import urlparse + from pathlib import Path + + # Retrieve list of remote files + remote_urls = get_webdav_directory_contents(WEBDAV_BASE_URL, auth) + parsed_base = urlparse(WEBDAV_BASE_URL) + + # Build mapping of sanitized stems to remote URLs + remote_map = {} + from urllib.parse import urlparse, unquote + for url in remote_urls: + if url.startswith('/'): + full_url = f"{parsed_base.scheme}://{parsed_base.netloc}{url}" + else: + full_url = url + # Decode percent-encoding for correct filename comparison + raw_name = unquote(Path(url).name) + stem = Path(raw_name).stem + sanitized_stem = sanitize_filename(stem) + remote_map[sanitized_stem] = full_url + + # Build mapping of local stems to file paths + local_map = {Path(p).stem: p for p in file_paths} + + # Determine local output directory from first file path + output_dir = Path(file_paths[0]).parent if file_paths else Path('.') + + # Download files present remotely but missing locally + for stem, full_url in remote_map.items(): + if stem not in local_map: + try: + resp = requests.get(full_url, auth=auth, timeout=30) + if resp.status_code == 200: + new_path = output_dir / f"{stem}.md" + with open(new_path, 'wb') as f: + f.write(resp.content) + print(f"Downloaded remote-only file: {new_path.name}") + file_paths.append(str(new_path)) + local_map[stem] = str(new_path) + else: + print(f"Warning: Failed to download remote file {full_url} - Status: {resp.status_code}") + except Exception as e: + print(f"Warning: Could not download {full_url}: {e}") + + # Merge existing local and remote content by matching sanitized stems + from difflib import SequenceMatcher + for stem, full_url in remote_map.items(): + if stem in local_map: + file_path = local_map[stem] + try: + resp = requests.get(full_url, auth=auth, timeout=30) + if resp.status_code == 200: + # Preserve blank lines when splitting for merge + remote_lines = resp.text.splitlines() + with open(file_path, 'r', encoding='utf-8') as f: + # Preserve blank lines when splitting for merge + local_lines = f.read().splitlines() + matcher = SequenceMatcher(None, local_lines, remote_lines) + merged_lines = [] + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag in ('equal', 'delete'): + merged_lines.extend(local_lines[i1:i2]) + if tag == 'insert': + merged_lines.extend(remote_lines[j1:j2]) + if tag == 'replace': + # Character-level check: if single-line replace + local_block = local_lines[i1:i2] + remote_block = remote_lines[j1:j2] + if len(local_block) == len(remote_block) == 1: + from difflib import SequenceMatcher as CharMatcher + ratio = CharMatcher(None, local_block[0], remote_block[0]).ratio() + # If remote change is minor (ratio>=0.9), keep remote; if major, keep local + if ratio >= 0.9: + merged_lines.append(remote_block[0]) + else: + merged_lines.append(local_block[0]) + else: + merged_lines.extend(local_block) + merged_lines.extend(remote_block) + # Post-process: collapse near-duplicate lines + from difflib import SequenceMatcher as LineMatcher + filtered = [] + for line in merged_lines: + if not filtered: + filtered.append(line) + else: + prev = filtered[-1] + if LineMatcher(None, prev, line).ratio() >= 0.9: + filtered[-1] = line + else: + filtered.append(line) + merged_lines = filtered + # Remove any escape backslashes before square brackets + import re + merged_lines = [re.sub(r'\\([\[\]])', r'\1', l) for l in merged_lines] + with open(file_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(merged_lines)) + print(f"Merged file: {Path(file_path).name}") + else: + print(f"Warning: Failed to fetch remote file {full_url} for merging - Status: {resp.status_code}") + except Exception as e: + print(f"Warning: Could not merge {full_url}: {e}") + successful_uploads = [] failed_uploads = [] @@ -569,9 +676,9 @@ def main(): help='Upload the markdown files to WebDAV server' ) parser.add_argument( - '--no-clear', + '--clear', action='store_true', - help='Skip clearing WebDAV directory before upload (files will be added/overwritten)' + help='Clear WebDAV directory before upload (default: merge with existing files)' ) parser.add_argument( '--direct', @@ -610,8 +717,8 @@ def main(): # Upload to WebDAV if requested if args.upload: - # Clear WebDAV directory first (unless --no-clear is specified) - if not args.no_clear: + # Clear WebDAV directory first only if explicitly requested + if args.clear: if not clear_webdav_directory(): print("Warning: Some files could not be deleted from WebDAV directory.") print("Continuing with upload (files will be overwritten)...")