diff --git a/cpdf.ml b/cpdf.ml index 6f54dbd..c8a1db4 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -2849,9 +2849,9 @@ let xmltree_of_bytes b = and data d = D d in Xmlm.input_doc_tree ~el ~data i -(*let rec string_of_xmltree = function +let rec string_of_xmltree = function D d -> - Printf.sprintf "DATA **%s**" d + Printf.sprintf "DATA {%s}" d | E (tag, trees) -> Printf.sprintf "ELT (%s, %s)" (string_of_tag tag) @@ -2863,7 +2863,7 @@ and string_of_tag ((n, n'), attributes) = (string_of_attributes attributes) and string_of_attribute ((n, n'), str) = - Printf.sprintf "ATTRNAME |%s| |%s|, STR **%s**" n n' str + Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str and string_of_attributes attrs = fold_left @@ -2871,15 +2871,46 @@ and string_of_attributes attrs = and string_of_xmltrees trees = fold_left - (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)*) + (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees) + +let adobe = "http://ns.adobe.com/pdf/1.3/" + +let xmp = "http://ns.adobe.com/xap/1.0/" + +let dc = "http://purl.org/dc/elements/1.1/" + +let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +let combine_with_spaces strs = + String.trim + (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs) + +(* Collect all
  • elements inside a seq, bag, or alt. Combine with commas. If +none found, return empty string instead. *) +let collect_list_items = function + E (((n, n'), _), elts) when + n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag") + -> + combine_with_spaces + (option_map + (function + E (((n, n'), _), [D d]) when n = rdf && n' = "li" -> + Some d + | _ -> None) + elts) + | _ -> "" + +let collect_list_items_all all = + match keep (function E _ -> true | _ -> false) all with + h::_ -> Some (collect_list_items h) + | [] -> None let rec get_data_for namespace name = function D _ -> None - | E (((n, n'), children), [D d]) when n = namespace && n' = name -> + | E (((n, n'), _), [D d]) when n = namespace && n' = name -> Some d - (*| E (((n, n'), l), [D d]) -> - if n' <> "image" then Printf.printf "%s %s %s\n" n n' d; - None*) + | E (((n, n'), _), e) when n = namespace && n' = name -> + collect_list_items_all e | E (_, l) -> match option_map (get_data_for namespace name) l with x :: _ -> Some x @@ -2897,13 +2928,6 @@ let output_xmp_info encoding pdf = None -> () | Some metadata -> let dtd, tree = xmltree_of_bytes metadata in - (*flprint "***************** ORIGINAL\n"; - print_endline (string_of_bytes metadata); - flprint "***************** TREE\n"; - print_endline (string_of_xmltree tree);*) - let adobe = "http://ns.adobe.com/pdf/1.3/" - and xmp = "http://ns.adobe.com/xap/1.0/" - and dc = "http://purl.org/dc/elements/1.1/" in print_out tree "XMP pdf:Keywords" adobe "Keywords"; print_out tree "XMP pdf:PDFVersion" adobe "PDFVersion"; print_out tree "XMP pdf:Producer" adobe "Producer"; @@ -2913,7 +2937,8 @@ let output_xmp_info encoding pdf = print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate"; print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate"; print_out tree "XMP dc:title" dc "title"; - print_out tree "XMP dc:creator" dc "creator" + print_out tree "XMP dc:creator" dc "creator"; + print_out tree "XMP dc:subject" dc "subject" (* \section{Blacken text} *)