v1.2 - a better uploading feature with file merging

2025-06-10 15:01:21 +02:00
parent cfea84ca25
commit caacaa5dd5
3 changed files with 122 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 kindle_books/*
 __pycache__/
 .ropeproject
+.claude/

 # Configuration files with sensitive data
 config.json
--- a/README.md
+++ b/README.md
@@ -93,3 +93,13 @@ Each book generates a Markdown file with citations formatted as blockquotes:
 ## License

 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+
+## v1.2
+
+Novità in questa release:
+
+- Merge avanzato di citazioni locali e remote con risoluzione carattere-per-carattere
+- Risoluzione dei conflitti: si mantiene la versione remota per modifiche minori e quella locale per modifiche maggiori
+- Preservazione delle righe vuote e rimozione automatica di backslash indesiderati attorno a `[[…]]`
+- Gestione dei nomi file URL-encoded per corretto matching
+- Nuovo flag CLI `--clear` per eliminare esplicitamente i file remoti prima dell'upload
--- a/main.py
+++ b/main.py
@@ -491,6 +491,113 @@ def upload_files_to_webdav(file_paths):
        print("Error: WebDAV username or password not configured")
        return [], file_paths

+    auth = HTTPBasicAuth(WEBDAV_USERNAME, WEBDAV_PASSWORD)
+
+    # Merge local and remote changes before upload
+    print("Merging local and remote changes...")
+    from urllib.parse import urlparse
+    from pathlib import Path
+
+    # Retrieve list of remote files
+    remote_urls = get_webdav_directory_contents(WEBDAV_BASE_URL, auth)
+    parsed_base = urlparse(WEBDAV_BASE_URL)
+
+    # Build mapping of sanitized stems to remote URLs
+    remote_map = {}
+    from urllib.parse import urlparse, unquote
+    for url in remote_urls:
+        if url.startswith('/'):
+            full_url = f"{parsed_base.scheme}://{parsed_base.netloc}{url}"
+        else:
+            full_url = url
+        # Decode percent-encoding for correct filename comparison
+        raw_name = unquote(Path(url).name)
+        stem = Path(raw_name).stem
+        sanitized_stem = sanitize_filename(stem)
+        remote_map[sanitized_stem] = full_url
+
+    # Build mapping of local stems to file paths
+    local_map = {Path(p).stem: p for p in file_paths}
+
+    # Determine local output directory from first file path
+    output_dir = Path(file_paths[0]).parent if file_paths else Path('.')
+
+    # Download files present remotely but missing locally
+    for stem, full_url in remote_map.items():
+        if stem not in local_map:
+            try:
+                resp = requests.get(full_url, auth=auth, timeout=30)
+                if resp.status_code == 200:
+                    new_path = output_dir / f"{stem}.md"
+                    with open(new_path, 'wb') as f:
+                        f.write(resp.content)
+                    print(f"Downloaded remote-only file: {new_path.name}")
+                    file_paths.append(str(new_path))
+                    local_map[stem] = str(new_path)
+                else:
+                    print(f"Warning: Failed to download remote file {full_url} - Status: {resp.status_code}")
+            except Exception as e:
+                print(f"Warning: Could not download {full_url}: {e}")
+
+    # Merge existing local and remote content by matching sanitized stems
+    from difflib import SequenceMatcher
+    for stem, full_url in remote_map.items():
+        if stem in local_map:
+            file_path = local_map[stem]
+            try:
+                resp = requests.get(full_url, auth=auth, timeout=30)
+                if resp.status_code == 200:
+                    # Preserve blank lines when splitting for merge
+                    remote_lines = resp.text.splitlines()  
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        # Preserve blank lines when splitting for merge
+                        local_lines = f.read().splitlines()  
+                    matcher = SequenceMatcher(None, local_lines, remote_lines)
+                    merged_lines = []
+                    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+                        if tag in ('equal', 'delete'):
+                            merged_lines.extend(local_lines[i1:i2])
+                        if tag == 'insert':
+                            merged_lines.extend(remote_lines[j1:j2])
+                        if tag == 'replace':
+                            # Character-level check: if single-line replace
+                            local_block = local_lines[i1:i2]
+                            remote_block = remote_lines[j1:j2]
+                            if len(local_block) == len(remote_block) == 1:
+                                from difflib import SequenceMatcher as CharMatcher
+                                ratio = CharMatcher(None, local_block[0], remote_block[0]).ratio()
+                                # If remote change is minor (ratio>=0.9), keep remote; if major, keep local
+                                if ratio >= 0.9:
+                                    merged_lines.append(remote_block[0])
+                                else:
+                                    merged_lines.append(local_block[0])
+                            else:
+                                merged_lines.extend(local_block)
+                                merged_lines.extend(remote_block)
+                    # Post-process: collapse near-duplicate lines
+                    from difflib import SequenceMatcher as LineMatcher
+                    filtered = []
+                    for line in merged_lines:
+                        if not filtered:
+                            filtered.append(line)
+                        else:
+                            prev = filtered[-1]
+                            if LineMatcher(None, prev, line).ratio() >= 0.9:
+                                filtered[-1] = line
+                            else:
+                                filtered.append(line)
+                    merged_lines = filtered
+                    # Remove any escape backslashes before square brackets
+                    import re
+                    merged_lines = [re.sub(r'\\([\[\]])', r'\1', l) for l in merged_lines]
+                    with open(file_path, 'w', encoding='utf-8') as f:
+                        f.write('\n'.join(merged_lines))
+                    print(f"Merged file: {Path(file_path).name}")
+                else:
+                    print(f"Warning: Failed to fetch remote file {full_url} for merging - Status: {resp.status_code}")
+            except Exception as e:
+                print(f"Warning: Could not merge {full_url}: {e}")
+
    successful_uploads = []
    failed_uploads = []

@@ -569,9 +676,9 @@ def main():
        help='Upload the markdown files to WebDAV server'
    )
    parser.add_argument(
-        '--no-clear',
+        '--clear',
        action='store_true',
-        help='Skip clearing WebDAV directory before upload (files will be added/overwritten)'
+        help='Clear WebDAV directory before upload (default: merge with existing files)'
    )
    parser.add_argument(
        '--direct',
@@ -610,8 +717,8 @@ def main():

        # Upload to WebDAV if requested
        if args.upload:
-            # Clear WebDAV directory first (unless --no-clear is specified)
-            if not args.no_clear:
+            # Clear WebDAV directory first only if explicitly requested
+            if args.clear:
                if not clear_webdav_directory():
                    print("Warning: Some files could not be deleted from WebDAV directory.")
                    print("Continuing with upload (files will be overwritten)...")