From c87ce65f51562d4758b3fd96b50e489c86429bec Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 27 Feb 2020 15:14:51 +0100 Subject: [PATCH] moved ocg stuff into cpdf.ml:" --- cpdf.ml | 108 ++++++++++++++++++++++++++++++++++++++++++++ cpdf.mli | 6 +++ cpdfcommand.ml | 119 +++---------------------------------------------- 3 files changed, 119 insertions(+), 114 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index a6c839d..a73c2be 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -4218,3 +4218,111 @@ let append_page_content s before fast range pdf = process_pages (append_page_content_page fast s before pdf) pdf range +(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names + * 2. Calculate a change list to coalesce them + * 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties + * 4. Do the changes to all indirect references in the whole pdf *) +(*FIXME Pre-existing nulls - what to do? *) +let ocg_coalesce pdf = + match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with + None -> () + | Some ocpdict -> + let number_name_pairs = + match Pdf.lookup_direct pdf "/OCGs" ocpdict with + Some (Pdf.Array ocgs) -> + begin let numbers = + map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs + in + let names = + map + (fun i -> + try + begin match Pdf.lookup_obj pdf i with + Pdf.Dictionary d -> + begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with + Some (Pdf.String s) -> s + | _ -> failwith "ocg: missing name" + end + | _ -> + failwith "ocg: not a dictionary" + end + with _ -> failwith "OCG object missing") + numbers + in + combine numbers names + end + | _ -> failwith "Malformed or missing /OCGs" + in + (*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*) + let changes = + let cf (_, name) (_, name') = compare name name' in + let sets = collate cf (List.stable_sort cf number_name_pairs) in + flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets) + in + (*Printf.printf "\nChanges are:\n"; + List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*) + let new_ocproperties = + let remove_from_array key nums dict = + match Pdf.lookup_direct pdf key dict with + | Some (Pdf.Array elts) -> + let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in + Pdf.add_dict_entry dict key (Pdf.Array elts') + | _ -> dict + in + let remove_from_array_inside_d key nums dict = + match Pdf.lookup_direct pdf "/D" dict with + | Some (Pdf.Dictionary ddict) -> + begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with + | Some (Pdf.Array elts) -> + let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in + Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts')) + | _ -> dict + end + | _ -> failwith "No /D dict in OCGProperties" + in + let nums = map fst changes in + (*Printf.printf "\nto remove:\n"; + List.iter (Printf.printf "%i ") nums;*) + remove_from_array "/OCGs" nums + (remove_from_array_inside_d "/ON" nums + (remove_from_array_inside_d "/OFF" nums + (remove_from_array_inside_d "/Order" nums ocpdict))) + in + (*flprint (Pdfwrite.string_of_pdf new_ocproperties);*) + let ocp_objnum = Pdf.addobj pdf new_ocproperties in + let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in + pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); + pdf.Pdf.root <- new_catalog; + Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf + +let ocg_list pdf = + match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with + None -> () + | Some ocpdict -> + match Pdf.lookup_direct pdf "/OCGs" ocpdict with + Some (Pdf.Array elts) -> + List.iter + (function + Pdf.Indirect i -> + (match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with + Some (Pdf.String s) -> Printf.printf "%s\n" s | _ -> ()) + | _ -> ()) + elts + | _ -> () + +let ocg_rename f t pdf = + Pdf.objselfmap + (function + Pdf.Dictionary d -> + begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with + Some (Pdf.Name "/OCG") -> + begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with + Some (Pdf.String s) when s = f -> + Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t) + | _ -> Pdf.Dictionary d + end + | _ -> Pdf.Dictionary d + end + | x -> x + ) + pdf diff --git a/cpdf.mli b/cpdf.mli index 5176a02..95dd36c 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -450,3 +450,9 @@ val extract_text : float option -> Pdf.t -> int list -> string val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t +val ocg_coalesce : Pdf.t -> unit + +val ocg_list : Pdf.t -> unit + +val ocg_rename : string -> string -> Pdf.t -> unit + diff --git a/cpdfcommand.ml b/cpdfcommand.ml index bf9c3fc..66c9cc0 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3585,117 +3585,6 @@ let write_json output pdf = CpdfwriteJSON.write f args.jsonparsecontentstreams args.jsonnostreamdata pdf; close_out f -(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names - * 2. Calculate a change list to coalesce them - * 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties - * 4. Do the changes to all indirect references in the whole pdf *) -(*FIXME Pre-existing nulls - what to do? *) -let ocg_coalesce pdf = - match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with - None -> pdf - | Some ocpdict -> - let number_name_pairs = - match Pdf.lookup_direct pdf "/OCGs" ocpdict with - Some (Pdf.Array ocgs) -> - begin let numbers = - map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs - in - let names = - map - (fun i -> - try - begin match Pdf.lookup_obj pdf i with - Pdf.Dictionary d -> - begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with - Some (Pdf.String s) -> s - | _ -> failwith "ocg: missing name" - end - | _ -> - failwith "ocg: not a dictionary" - end - with _ -> failwith "OCG object missing") - numbers - in - combine numbers names - end - | _ -> failwith "Malformed or missing /OCGs" - in - (*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*) - let changes = - let cf (_, name) (_, name') = compare name name' in - let sets = collate cf (List.stable_sort cf number_name_pairs) in - flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets) - in - (*Printf.printf "\nChanges are:\n"; - List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*) - let new_ocproperties = - let remove_from_array key nums dict = - match Pdf.lookup_direct pdf key dict with - | Some (Pdf.Array elts) -> - let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in - Pdf.add_dict_entry dict key (Pdf.Array elts') - | _ -> dict - in - let remove_from_array_inside_d key nums dict = - match Pdf.lookup_direct pdf "/D" dict with - | Some (Pdf.Dictionary ddict) -> - begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with - | Some (Pdf.Array elts) -> - let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in - Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts')) - | _ -> dict - end - | _ -> failwith "No /D dict in OCGProperties" - in - let nums = map fst changes in - (*Printf.printf "\nto remove:\n"; - List.iter (Printf.printf "%i ") nums;*) - remove_from_array "/OCGs" nums - (remove_from_array_inside_d "/ON" nums - (remove_from_array_inside_d "/OFF" nums - (remove_from_array_inside_d "/Order" nums ocpdict))) - in - (*flprint (Pdfwrite.string_of_pdf new_ocproperties);*) - let ocp_objnum = Pdf.addobj pdf new_ocproperties in - let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in - pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); - pdf.Pdf.root <- new_catalog; - Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf; - pdf - -let ocg_list pdf = - match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with - None -> () - | Some ocpdict -> - match Pdf.lookup_direct pdf "/OCGs" ocpdict with - Some (Pdf.Array elts) -> - List.iter - (function - Pdf.Indirect i -> - (match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with - Some (Pdf.String s) -> Printf.printf "%s\n" s | _ -> ()) - | _ -> ()) - elts - | _ -> () - -let ocg_rename f t pdf = - Pdf.objselfmap - (function - Pdf.Dictionary d -> - begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with - Some (Pdf.Name "/OCG") -> - begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with - Some (Pdf.String s) when s = f -> - Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t) - | _ -> Pdf.Dictionary d - end - | _ -> Pdf.Dictionary d - end - | x -> x - ) - pdf; - pdf - (* Main function *) let go () = match args.op with @@ -4586,13 +4475,15 @@ let go () = write_json args.out pdf | Some OCGCoalesce -> let pdf = get_single_pdf args.op false in - write_pdf false (ocg_coalesce pdf) + Cpdf.ocg_coalesce pdf; + write_pdf false pdf | Some OCGList -> let pdf = get_single_pdf args.op true in - ocg_list pdf + Cpdf.ocg_list pdf | Some OCGRename -> let pdf = get_single_pdf args.op false in - write_pdf false (ocg_rename args.ocgrenamefrom args.ocgrenameto pdf) + Cpdf.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf; + write_pdf false pdf let parse_argv () = if args.debug then