mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-03-02 10:38:03 +01:00
Plumbing for -proces-images -dpi-threshold
This commit is contained in:
parent
3a39b5497a
commit
ecbccd4567
@ -4321,13 +4321,13 @@ let go () =
|
|||||||
if args.format_json then
|
if args.format_json then
|
||||||
flprint
|
flprint
|
||||||
(Cpdfyojson.Safe.pretty_to_string
|
(Cpdfyojson.Safe.pretty_to_string
|
||||||
(`List (map (fun (pagenum, xobject, w, h, wdpi, hdpi) ->
|
(`List (map (fun (pagenum, xobject, w, h, wdpi, hdpi, objnum) ->
|
||||||
`Assoc [("Page", `Int pagenum); ("XObject", `String xobject); ("W", `Int w); ("H", `Int h); ("Xdpi", `Float wdpi); ("Ydpi", `Float hdpi)]) images)))
|
`Assoc [("Object", `Int objnum); ("Page", `Int pagenum); ("XObject", `String xobject); ("W", `Int w); ("H", `Int h); ("Xdpi", `Float wdpi); ("Ydpi", `Float hdpi)]) images)))
|
||||||
else
|
else
|
||||||
iter
|
iter
|
||||||
(function (pagenum, xobject, w, h, wdpi, hdpi) ->
|
(function (pagenum, xobject, w, h, wdpi, hdpi, objnum) ->
|
||||||
if wdpi < f || hdpi < f then
|
if wdpi < f || hdpi < f then
|
||||||
Printf.printf "%i, %s, %i, %i, %f, %f\n" pagenum xobject w h wdpi hdpi)
|
Printf.printf "%i, %s, %i, %i, %f, %f, %i\n" pagenum xobject w h wdpi hdpi objnum)
|
||||||
images
|
images
|
||||||
| Some ListImages ->
|
| Some ListImages ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
|
23
cpdfimage.ml
23
cpdfimage.ml
@ -187,11 +187,7 @@ type xobj =
|
|||||||
|
|
||||||
let image_results = ref []
|
let image_results = ref []
|
||||||
|
|
||||||
let add_image_result i =
|
let rec image_resolution_page pdf page pagenum dpi images =
|
||||||
image_results := i::!image_results
|
|
||||||
|
|
||||||
(* Given a page and a list of (pagenum, name, thing) *)
|
|
||||||
let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xobj) list) =
|
|
||||||
try
|
try
|
||||||
let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content
|
let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
and transform = ref [ref Pdftransform.i_matrix] in
|
and transform = ref [ref Pdftransform.i_matrix] in
|
||||||
@ -215,11 +211,11 @@ let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xob
|
|||||||
(*i Printf.printf "o = %f, %f, x = %f, %f, y = %f, %f\n" (fst o) (snd o) (fst x) (snd x) (fst y) (snd y); i*)
|
(*i Printf.printf "o = %f, %f, x = %f, %f, y = %f, %f\n" (fst o) (snd o) (fst x) (snd x) (fst y) (snd y); i*)
|
||||||
let rec lookup_image k = function
|
let rec lookup_image k = function
|
||||||
| [] -> assert false
|
| [] -> assert false
|
||||||
| (_, a, _) as h::_ when a = k -> h
|
| (_, a, _, _) as h::_ when a = k -> h
|
||||||
| _::t -> lookup_image k t
|
| _::t -> lookup_image k t
|
||||||
in
|
in
|
||||||
begin match lookup_image xobject images with
|
begin match lookup_image xobject images with
|
||||||
| (pagenum, name, Form (xobj_matrix, content, resources)) ->
|
| (pagenum, name, Form (xobj_matrix, content, resources), objnum) ->
|
||||||
let content =
|
let content =
|
||||||
(* Add in matrix etc. *)
|
(* Add in matrix etc. *)
|
||||||
let total_matrix = Pdftransform.matrix_compose xobj_matrix !(hd !transform) in
|
let total_matrix = Pdftransform.matrix_compose xobj_matrix !(hd !transform) in
|
||||||
@ -238,12 +234,12 @@ let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xob
|
|||||||
in
|
in
|
||||||
let newpdf = Pdfpage.change_pages false pdf [page] in
|
let newpdf = Pdfpage.change_pages false pdf [page] in
|
||||||
image_resolution newpdf [pagenum] dpi
|
image_resolution newpdf [pagenum] dpi
|
||||||
| (pagenum, name, Image (w, h)) ->
|
| (pagenum, name, Image (w, h), objnum) ->
|
||||||
let lx = Pdfunits.points (distance_between o x) Pdfunits.Inch in
|
let lx = Pdfunits.points (distance_between o x) Pdfunits.Inch in
|
||||||
let ly = Pdfunits.points (distance_between o y) Pdfunits.Inch in
|
let ly = Pdfunits.points (distance_between o y) Pdfunits.Inch in
|
||||||
let wdpi = float w /. lx
|
let wdpi = float w /. lx
|
||||||
and hdpi = float h /. ly in
|
and hdpi = float h /. ly in
|
||||||
add_image_result (pagenum, xobject, w, h, wdpi, hdpi)
|
image_results := (pagenum, xobject, w, h, wdpi, hdpi, objnum)::!image_results
|
||||||
(*Printf.printf "%i, %s, %i, %i, %f, %f\n" pagenum xobject w h wdpi hdpi*)
|
(*Printf.printf "%i, %s, %i, %i, %f, %f\n" pagenum xobject w h wdpi hdpi*)
|
||||||
(*i else
|
(*i else
|
||||||
Printf.printf "S %i, %s, %i, %i, %f, %f\n" pagenum xobject (int_of_float w) (int_of_float h) wdpi hdpi i*)
|
Printf.printf "S %i, %s, %i, %i, %f, %f\n" pagenum xobject (int_of_float w) (int_of_float h) wdpi hdpi i*)
|
||||||
@ -275,6 +271,7 @@ and image_resolution pdf range dpi =
|
|||||||
| Some (Pdf.Dictionary xobjects) ->
|
| Some (Pdf.Dictionary xobjects) ->
|
||||||
iter
|
iter
|
||||||
(function (name, xobject) ->
|
(function (name, xobject) ->
|
||||||
|
let objnum = match xobject with Pdf.Indirect i -> i | _ -> 0 in
|
||||||
match Pdf.lookup_direct pdf "/Subtype" xobject with
|
match Pdf.lookup_direct pdf "/Subtype" xobject with
|
||||||
| Some (Pdf.Name "/Image") ->
|
| Some (Pdf.Name "/Image") ->
|
||||||
let width =
|
let width =
|
||||||
@ -286,7 +283,7 @@ and image_resolution pdf range dpi =
|
|||||||
| Some x -> Pdf.getnum pdf x
|
| Some x -> Pdf.getnum pdf x
|
||||||
| None -> 1.
|
| None -> 1.
|
||||||
in
|
in
|
||||||
images := (pagenum, name, Image (int_of_float width, int_of_float height))::!images
|
images := (pagenum, name, Image (int_of_float width, int_of_float height), objnum)::!images
|
||||||
| Some (Pdf.Name "/Form") ->
|
| Some (Pdf.Name "/Form") ->
|
||||||
let resources =
|
let resources =
|
||||||
match Pdf.lookup_direct pdf "/Resources" xobject with
|
match Pdf.lookup_direct pdf "/Resources" xobject with
|
||||||
@ -301,7 +298,7 @@ and image_resolution pdf range dpi =
|
|||||||
Pdftransform.d = Pdf.getnum pdf d; Pdftransform.e = Pdf.getnum pdf e; Pdftransform.f = Pdf.getnum pdf f}
|
Pdftransform.d = Pdf.getnum pdf d; Pdftransform.e = Pdf.getnum pdf e; Pdftransform.f = Pdf.getnum pdf f}
|
||||||
| _ -> Pdftransform.i_matrix
|
| _ -> Pdftransform.i_matrix
|
||||||
in
|
in
|
||||||
images := (pagenum, name, Form (matrix, contents, resources))::!images
|
images := (pagenum, name, Form (matrix, contents, resources), objnum)::!images
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
)
|
)
|
||||||
xobjects
|
xobjects
|
||||||
@ -311,8 +308,8 @@ and image_resolution pdf range dpi =
|
|||||||
(* Now, split into differing pages, and call [image_resolution_page] on each one *)
|
(* Now, split into differing pages, and call [image_resolution_page] on each one *)
|
||||||
let pagesplits =
|
let pagesplits =
|
||||||
map
|
map
|
||||||
(function (a, _, _)::_ as ls -> (a, ls) | _ -> assert false)
|
(function (a, _, _, _)::_ as ls -> (a, ls) | _ -> assert false)
|
||||||
(collate (fun (a, _, _) (b, _, _) -> compare a b) (rev !images))
|
(collate (fun (a, _, _, _) (b, _, _, _) -> compare a b) (rev !images))
|
||||||
and pages =
|
and pages =
|
||||||
Pdfpage.pages_of_pagetree pdf
|
Pdfpage.pages_of_pagetree pdf
|
||||||
in
|
in
|
||||||
|
@ -9,7 +9,7 @@ val extract_images :
|
|||||||
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
||||||
|
|
||||||
(** Report image resolutions. *)
|
(** Report image resolutions. *)
|
||||||
val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list
|
val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float * int) list
|
||||||
|
|
||||||
(** List images in JSON format *)
|
(** List images in JSON format *)
|
||||||
val images : Pdf.t -> int list -> Cpdfyojson.Safe.t
|
val images : Pdf.t -> int list -> Cpdfyojson.Safe.t
|
||||||
|
Loading…
x
Reference in New Issue
Block a user