v1.1 - Direct parsing with flag --direct

Now the script, when invoked with --direct with a Kindle attached and mounted, will parse clippings automatically, without My Clippings.txt into program's folder
2025-05-31 16:02:28 +02:00
parent 720cf32d8f
commit bf438159bf
2 changed files with 120 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ Each book generates a Markdown file with citations formatted as blockquotes:
 - `requests` library for WebDAV functionality

 # To-Do
- [ ] Auto-parse Kindle clippings directly from Kindle device
+- [x] Auto-parse Kindle clippings directly from Kindle device
 - [ ] GUI interface

 ## License
--- a/main.py
+++ b/main.py
@@ -14,9 +14,115 @@ import argparse
 import requests
 from requests.auth import HTTPBasicAuth
 from xml.etree import ElementTree as ET
+import os
+import shutil
 from config import get_webdav_config


+def find_kindle_folder():
+    """
+    Search for Kindle folder recursively in /run/media subdirectories.
+    
+    Returns:
+        str: Path to Kindle folder if found, None otherwise
+    """
+    media_path = Path('/run/media')
+    
+    if not media_path.exists():
+        print("Error: /run/media directory not found")
+        return None
+    
+    def search_recursive(path, max_depth=5, current_depth=0):
+        """Recursively search for Kindle folder with depth limit."""
+        if current_depth > max_depth:
+            return None
+            
+        try:
+            for item in path.iterdir():
+                if item.is_dir():
+                    # Check if current directory is named 'Kindle'
+                    if item.name == 'Kindle':
+                        # Verify it contains documents folder (basic validation)
+                        documents_path = item / 'documents'
+                        if documents_path.exists() and documents_path.is_dir():
+                            print(f"Found Kindle folder: {item}")
+                            return str(item)
+                    
+                    # Continue searching in subdirectories
+                    result = search_recursive(item, max_depth, current_depth + 1)
+                    if result:
+                        return result
+        except PermissionError:
+            # Skip directories we can't read
+            pass
+        except Exception:
+            # Skip any other errors and continue searching
+            pass
+        
+        return None
+    
+    print("Searching for Kindle folder in /run/media...")
+    kindle_path = search_recursive(media_path)
+    
+    if not kindle_path:
+        print("Error: Kindle folder not found in /run/media")
+        return None
+        
+    return kindle_path
+
+
+def extract_clippings_from_kindle(kindle_path, output_file='My Clippings.txt'):
+    """
+    Extract My Clippings.txt from Kindle/documents folder.
+    
+    Args:
+        kindle_path (str): Path to Kindle folder
+        output_file (str): Output filename for the extracted clippings
+    
+    Returns:
+        str: Path to extracted file if successful, None otherwise
+    """
+    documents_path = Path(kindle_path) / 'documents'
+    clippings_source = documents_path / 'My Clippings.txt'
+    
+    if not documents_path.exists():
+        print(f"Error: documents folder not found in {kindle_path}")
+        return None
+    
+    if not clippings_source.exists():
+        print(f"Error: My Clippings.txt not found in {documents_path}")
+        return None
+    
+    try:
+        # Copy the file to current directory
+        output_path = Path(output_file)
+        shutil.copy2(clippings_source, output_path)
+        print(f"Extracted: {clippings_source} -> {output_path}")
+        return str(output_path)
+    except Exception as e:
+        print(f"Error extracting clippings file: {e}")
+        return None
+
+
+def auto_parse():
+    """
+    Automatically find and extract My Clippings.txt from connected Kindle device.
+    
+    Returns:
+        str: Path to extracted clippings file if successful, None otherwise
+    """
+    print("Auto-parsing: Searching for Kindle device...")
+    
+    # Find Kindle folder
+    kindle_path = find_kindle_folder()
+    if not kindle_path:
+        return None
+    
+    # Extract clippings file
+    clippings_file = extract_clippings_from_kindle(kindle_path)
+    return clippings_file
+
+
 def clean_title(title):
    """Clean and normalize book title."""
    return title.strip().replace('\r', '').replace('\n', '')
@@ -467,9 +573,22 @@ def main():
        action='store_true',
        help='Skip clearing WebDAV directory before upload (files will be added/overwritten)'
    )
+    parser.add_argument(
+        '--direct',
+        action='store_true',
+        help='Auto-parse: search for Kindle device in /run/media and extract My Clippings.txt automatically'
+    )

    args = parser.parse_args()

+    # Handle --direct flag for auto-parsing
+    if args.direct:
+        clippings_file = auto_parse()
+        if not clippings_file:
+            print("Auto-parse failed. Could not find or extract My Clippings.txt")
+            return 1
+        args.input_file = clippings_file
+
    # Check if input file exists
    input_path = Path(args.input_file)
    if not input_path.exists():