Beignning -ocg-coalesce-on-name

This commit is contained in:
John Whitington 2020-02-26 17:24:27 +01:00
parent 9744ccb372
commit 78908569be
1 changed files with 54 additions and 5 deletions

View File

@ -190,6 +190,7 @@ type op =
| Prepend of string | Prepend of string
| Postpend of string | Postpend of string
| OutputJSON | OutputJSON
| OCGCoalesce
let string_of_op = function let string_of_op = function
| CopyFont _ -> "CopyFont" | CopyFont _ -> "CopyFont"
@ -313,6 +314,7 @@ let string_of_op = function
| Prepend _ -> "Prepend" | Prepend _ -> "Prepend"
| Postpend _ -> "Postpend" | Postpend _ -> "Postpend"
| OutputJSON -> "OutputJSON" | OutputJSON -> "OutputJSON"
| OCGCoalesce -> "OCGCoalesce"
(* Inputs: filename, pagespec. *) (* Inputs: filename, pagespec. *)
type input_kind = type input_kind =
@ -682,7 +684,7 @@ let banned banlist = function
| SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _ | SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | SetMetadata _ | SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | SetMetadata _
| ExtractText | ExtractImages | ExtractFontFile | ExtractText | ExtractImages | ExtractFontFile
| AddPageLabels | RemovePageLabels | OutputJSON | AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
-> false (* Always allowed *) -> false (* Always allowed *)
(* Combine pages is not allowed because we would not know where to get the (* Combine pages is not allowed because we would not know where to get the
-recrypt from -- the first or second file? *) -recrypt from -- the first or second file? *)
@ -2095,10 +2097,12 @@ and specs =
("-squeeze-log-to", Arg.String setsqueezelogto, " Squeeze log location"); ("-squeeze-log-to", Arg.String setsqueezelogto, " Squeeze log location");
(* Just for error reporting *) (* Just for error reporting *)
("-gs-malformed-force", Arg.Unit whingemalformed, ""); ("-gs-malformed-force", Arg.Unit whingemalformed, "");
(* These items are undocumented *) (* These items are not documented yet, but will be soon *)
("-output-json", Arg.Unit (setop OutputJSON), ""); ("-output-json", Arg.Unit (setop OutputJSON), "");
("-output-json-parse-content-streams", Arg.Unit setjsonparsecontentstreams, ""); ("-output-json-parse-content-streams", Arg.Unit setjsonparsecontentstreams, "");
("-output-json-no-stream-data", Arg.Unit setjsonnostreamdata, ""); ("-output-json-no-stream-data", Arg.Unit setjsonnostreamdata, "");
("-ocg-coalesce-on-name", Arg.Unit (setop OCGCoalesce), "");
(* These items are undocumented *)
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), ""); ("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
("-stay-on-error", Arg.Unit setstayonerror, ""); ("-stay-on-error", Arg.Unit setstayonerror, "");
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), ""); ("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
@ -2116,9 +2120,6 @@ and specs =
("-fix-prince", Arg.Unit (setop RemoveUnusedResources), ""); ("-fix-prince", Arg.Unit (setop RemoveUnusedResources), "");
("-extract-text", Arg.Unit (setop ExtractText), ""); ("-extract-text", Arg.Unit (setop ExtractText), "");
("-extract-text-font-size", Arg.Float setextracttextfontsize, ""); ("-extract-text-font-size", Arg.Float setextracttextfontsize, "");
(*("-change-font-size-to", Arg.Float setchangefontsizeto, "");
("-change-font-size-shift", Arg.String setchangefontsizeshift, "");
("-change-font-size-color", Arg.String setchangefontsizecolor, "")*)
] ]
and usage_msg = and usage_msg =
@ -3563,6 +3564,51 @@ let write_json output pdf =
CpdfwriteJSON.write f args.jsonparsecontentstreams args.jsonnostreamdata pdf; CpdfwriteJSON.write f args.jsonparsecontentstreams args.jsonnostreamdata pdf;
close_out f close_out f
(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names
* 2. Calculate a change list to coalesce them
* 3. Make the changes in /Properties lists of pages / form xobjects. Anywhere else?
* 4. Remove the old ones by making null everywhere else in the document. *)
(*FIXME Pre-existing nulls - what to do? *)
let ocg_coalesce pdf =
let number_name_pairs =
match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> []
| Some d ->
match Pdf.lookup_direct pdf "/OCGs" d with
Some (Pdf.Array ocgs) ->
begin let numbers =
map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs
in
let names =
map
(fun i ->
try
begin match Pdf.lookup_obj pdf i with
Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with
Some (Pdf.String s) -> s
| _ -> failwith "ocg: missing name"
end
| _ ->
failwith "ocg: not a dictionary"
end
with _ -> failwith "OCG object missing")
numbers
in
combine numbers names
end
| _ -> failwith "Malformed or missing /OCGs"
in
iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;
let changes =
let cf (_, name) (_, name') = compare name name' in
let sets = collate cf (List.stable_sort cf number_name_pairs) in
flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets)
in
Printf.printf "\nChanges are:\n";
List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;
pdf
(* Main function *) (* Main function *)
let go () = let go () =
match args.op with match args.op with
@ -4451,6 +4497,9 @@ let go () =
| Some OutputJSON -> | Some OutputJSON ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
write_json args.out pdf write_json args.out pdf
| Some OCGCoalesce ->
let pdf = get_single_pdf args.op false in
write_pdf false (ocg_coalesce pdf)
let parse_argv () = let parse_argv () =
if args.debug then if args.debug then