This commit is contained in:
John Whitington 2023-03-22 19:29:12 +00:00
parent 06a0553568
commit 3af1730e14
2 changed files with 66 additions and 33 deletions

View File

@ -4,10 +4,10 @@ NONDOC = cpdfyojson cpdfxmlm cpdfutil
DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \ DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
cpdfbookmarks cpdfpage cpdftruetype cpdfremovetext cpdfextracttext \ cpdfbookmarks cpdfpage cpdftruetype cpdfremovetext cpdfextracttext \
cpdfembed cpdfaddtext cpdfimage cpdffont cpdftype cpdfpad cpdfocg \ cpdfembed cpdfaddtext cpdffont cpdftype cpdfpad cpdfocg \
cpdfsqueeze cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot \ cpdfsqueeze cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot \
cpdfxobject cpdfimpose cpdftweak cpdftexttopdf cpdftoc cpdfjpeg \ cpdfxobject cpdfimpose cpdftweak cpdftexttopdf cpdftoc cpdfjpeg \
cpdfpng cpdfdraw cpdfcommand cpdfpng cpdfimage cpdfdraw cpdfcommand
MODS = $(NONDOC) $(DOC) MODS = $(NONDOC) $(DOC)

View File

@ -22,6 +22,7 @@ let pnm_to_channel_24 channel w h s =
done done
done done
(* FIXME do this all at once *)
let write_stream name stream = let write_stream name stream =
let fh = open_out_bin name in let fh = open_out_bin name in
for x = 0 to bytes_size stream - 1 do for x = 0 to bytes_size stream - 1 do
@ -29,8 +30,34 @@ let write_stream name stream =
done; done;
close_out fh close_out fh
(* Detect images we can write directly as PNGs, to avoid going through pnm2png or imagemagick.
This is when BPC = 8, colourspace = DeviceRGB or CalRGB, compression is /FlateDecode. *)
let write_image_png pdf resources name dict =
(*Printf.printf "%s\n" (Pdfwrite.string_of_pdf (Pdf.direct pdf dict));*)
match
Pdfimage.colspace pdf dict resources,
Pdfimage.bpc pdf dict,
Pdf.lookup_direct pdf "/Filter" dict
with
| (Pdfspace.DeviceRGB | Pdfspace.CalRGB _),
Some (Pdf.Integer 8),
Some (Pdf.Name "/FlateDecode" | Pdf.Array [Pdf.Name "/FlateDecode"]) ->
(*Printf.printf "Direct to png...\n";*)
Pdf.getstream (Pdf.direct pdf dict);
let ch = open_out_bin (name ^ ".png") in
let o = Pdfio.output_of_channel ch in
let width = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer x) -> x | _ -> raise Exit in
let height = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer x) -> x | _ -> raise Exit in
let idat = match Pdf.direct pdf dict with Pdf.Stream {contents = (_, Got bytes)} -> bytes | _ -> raise Exit in
(*Printf.printf "all ok...\n";*)
Cpdfpng.write_png {width; height; idat} o;
close_out ch;
true
| _ -> false
let write_image path_to_p2p path_to_im pdf resources name image = let write_image path_to_p2p path_to_im pdf resources name image =
match Pdfimage.get_image_24bpp pdf resources image with let main () =
begin match Pdfimage.get_image_24bpp pdf resources image with
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
| Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream | Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream
@ -66,6 +93,12 @@ let write_image path_to_p2p path_to_im pdf resources name image =
end end
| _ -> | _ ->
Printf.eprintf "Unsupported image type when extracting image %s %!" name Printf.eprintf "Unsupported image type when extracting image %s %!" name
end
in
match write_image_png pdf resources name image with
| true -> ()
| exception x -> Printf.printf "Failed to write PNG directly (%s)\n" (Printexc.to_string x); main ()
| _ -> main ()
let written = ref [] let written = ref []