From a8a102692c7028d4ea0bd1846d11c24e1b7a4fc4 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 24 Jun 2024 16:21:28 +0100 Subject: [PATCH] Enhance -obj with page numbers, chains --- Changes | 2 +- cpdfcommand.ml | 34 +++++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Changes b/Changes index 97668ff..780cd8d 100644 --- a/Changes +++ b/Changes @@ -12,7 +12,7 @@ o Set the natural language of a file Extended features: -o Allow -obj/-extract-stream to lookup nested PDF information +o Allow -obj to look up nested PDF information o Merge structure trees better when merging files o Report top-level natural language on -info o Report mark information dictionary contents on -info diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 8878557..3d9733a 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3412,10 +3412,8 @@ let build_enc () = Pdfwrite.user_password = args.user; Pdfwrite.permissions = banlist_of_args ()} -let objnum_of_objspec s = - int_of_string s - let extract_stream pdf decomp objnum = + let objnum = int_of_string objnum in (* maybe objspec in the future... *) let obj = Pdf.lookup_obj pdf objnum in Pdf.getstream obj; if decomp then Pdfcodec.decode_pdfstream_until_unknown pdf obj; @@ -3434,9 +3432,31 @@ let extract_stream pdf decomp objnum = | Stdout -> output_string stdout (Pdfio.string_of_bytes data) -let print_obj pdf objnum = - let obj = if objnum = 0 then pdf.Pdf.trailerdict else Pdf.lookup_obj pdf objnum in +(* Empty string is trailerdict. Begins with / and it's a chain separated by + commas. Begins with P and it's a page number then a (possibly empty) chain. + Otherwise it's an object number (0 = trailerdict). *) +let print_obj pdf objspec = + let simple_obj obj = + let obj = if obj = 0 then pdf.Pdf.trailerdict else Pdf.lookup_obj pdf obj in Printf.printf "%S\n" (Pdfwrite.string_of_pdf obj) + in + let chain_obj objnum chain = + let obj = if objnum = 0 then pdf.Pdf.trailerdict else Pdf.lookup_obj pdf objnum in + match Pdf.lookup_chain pdf obj chain with + | Some x -> Printf.printf "%S\n" (Pdfwrite.string_of_pdf x) + | None -> () + in + match explode objspec with + | 'P'::more -> + let number, chain = + let digits, rest = cleavewhile isdigit more in + List.nth (Pdf.page_reference_numbers pdf) (int_of_string (implode digits) - 1), + begin match String.split_on_char ',' (implode rest) with [""] -> [] | x -> x end + in + chain_obj number chain + | '/'::more -> chain_obj 0 (String.split_on_char ',' (implode ('/'::more))) + | [] -> simple_obj 0 + | _ -> simple_obj (int_of_string objspec) (* Main function *) let go () = @@ -4458,10 +4478,10 @@ let go () = write_pdf false pdf | Some (ExtractStream s) -> let pdf = get_single_pdf args.op true in - extract_stream pdf args.extract_stream_decompress (objnum_of_objspec s) + extract_stream pdf args.extract_stream_decompress s | Some (PrintObj s) -> let pdf = get_single_pdf args.op true in - print_obj pdf (objnum_of_objspec s) + print_obj pdf s | Some (Verify standard) -> begin match standard with | "PDF/UA-1(matterhorn)" ->