mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Obet -draw-struct-trees properly
This commit is contained in:
		
							
								
								
									
										44
									
								
								cpdfdraw.ml
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								cpdfdraw.ml
									
									
									
									
									
								
							| @@ -207,9 +207,9 @@ type structdata = | |||||||
|  |  | ||||||
| let structdata = ref [] | let structdata = ref [] | ||||||
|  |  | ||||||
| let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = function | let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page = function | ||||||
|   | Qq ops -> |   | Qq ops -> | ||||||
|       [Pdfops.Op_q] @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q] |       [Pdfops.Op_q] @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q] | ||||||
|   | Matrix m -> [Pdfops.Op_cm m]  |   | Matrix m -> [Pdfops.Op_cm m]  | ||||||
|   | Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)] |   | Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)] | ||||||
|   | Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)] |   | Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)] | ||||||
| @@ -245,7 +245,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | |||||||
|   | SetMiterLimit m -> [Pdfops.Op_M m] |   | SetMiterLimit m -> [Pdfops.Op_M m] | ||||||
|   | SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)] |   | SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)] | ||||||
|   | FormXObject (a, b, c, d, n, ops) -> |   | FormXObject (a, b, c, d, n, ops) -> | ||||||
|       create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops; |       create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops; | ||||||
|       [] |       [] | ||||||
|   | Use n -> |   | Use n -> | ||||||
|       let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in |       let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in | ||||||
| @@ -256,7 +256,9 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | |||||||
|         if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata; |         if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata; | ||||||
|       let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in |       let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in | ||||||
|         (res ()).page_names <- pdfname::(res ()).page_names; |         (res ()).page_names <- pdfname::(res ()).page_names; | ||||||
|         [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_Do pdfname; Pdfops.Op_EMC] |             (if struct_tree then [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) | ||||||
|  |           @ [Pdfops.Op_Do pdfname] | ||||||
|  |           @ (if struct_tree then [Pdfops.Op_EMC] else []) | ||||||
|   | ImageXObject (s, obj) -> |   | ImageXObject (s, obj) -> | ||||||
|       Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);  |       Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);  | ||||||
|       [] |       [] | ||||||
| @@ -313,11 +315,11 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | |||||||
|   | TextSection ops -> |   | TextSection ops -> | ||||||
|       let m = mcid () in |       let m = mcid () in | ||||||
|         if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata; |         if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata; | ||||||
|         [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]); |           (if struct_tree then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) | ||||||
|          Pdfops.Op_BT] |         @ [Pdfops.Op_BT] | ||||||
|         @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ |         @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops | ||||||
|         [Pdfops.Op_ET; |         @ [Pdfops.Op_ET]  | ||||||
|          Pdfops.Op_EMC] |         @ (if struct_tree then [Pdfops.Op_EMC] else []) | ||||||
|   | Text s -> |   | Text s -> | ||||||
|       if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); |       if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); | ||||||
|       runs_of_utf8 s |       runs_of_utf8 s | ||||||
| @@ -333,14 +335,14 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | |||||||
|   | Rise f -> [Pdfops.Op_Ts f] |   | Rise f -> [Pdfops.Op_Ts f] | ||||||
|   | Newline -> [Pdfops.Op_T'] |   | Newline -> [Pdfops.Op_T'] | ||||||
|  |  | ||||||
| and ops_of_drawops dryrun pdf endpage filename bates batespad num page drawops = | and ops_of_drawops draw_struct dryrun pdf endpage filename bates batespad num page drawops = | ||||||
|   flatten (map (ops_of_drawop dryrun pdf endpage filename bates batespad num page) drawops) |   flatten (map (ops_of_drawop draw_struct dryrun pdf endpage filename bates batespad num page) drawops) | ||||||
|  |  | ||||||
| and create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops = | and create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops = | ||||||
|   respush (); |   respush (); | ||||||
|   reset_state (); |   reset_state (); | ||||||
|   let data = |   let data = | ||||||
|     Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops dryrun pdf endpage filename bates batespad num page ops)) |     Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops)) | ||||||
|   in |   in | ||||||
|   let obj = |   let obj = | ||||||
|     Pdf.Stream |     Pdf.Stream | ||||||
| @@ -420,14 +422,14 @@ let add_artifacts ops = | |||||||
|   in |   in | ||||||
|     loop [] ops |     loop [] ops | ||||||
|  |  | ||||||
| let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||||
|   (res ()).num <- max (res ()).num (minimum_resource_number pdf range); |   (res ()).num <- max (res ()).num (minimum_resource_number pdf range); | ||||||
|   let endpage = Pdfpage.endpage pdf in |   let endpage = Pdfpage.endpage pdf in | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|   let ops = |   let ops = | ||||||
|     if contains_specials drawops |     if contains_specials drawops | ||||||
|       then None |       then None | ||||||
|       else Some (ops_of_drawops false pdf endpage filename bates batespad 0 (hd pages) drawops) |       else Some (ops_of_drawops struct_tree false pdf endpage filename bates batespad 0 (hd pages) drawops) | ||||||
|   in |   in | ||||||
|   let ss = |   let ss = | ||||||
|     map2 |     map2 | ||||||
| @@ -436,7 +438,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | |||||||
|            then |            then | ||||||
|              (match ops with |              (match ops with | ||||||
|               | Some x -> x |               | Some x -> x | ||||||
|               | None -> ops_of_drawops false pdf endpage filename bates batespad n p drawops) |               | None -> ops_of_drawops struct_tree false pdf endpage filename bates batespad n p drawops) | ||||||
|            else []) |            else []) | ||||||
|       (ilist 1 endpage) |       (ilist 1 endpage) | ||||||
|       pages |       pages | ||||||
| @@ -445,7 +447,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | |||||||
|     map3 |     map3 | ||||||
|       (fun n p ops -> |       (fun n p ops -> | ||||||
|         if not (mem n range) then p else |         if not (mem n range) then p else | ||||||
|           let ops = add_artifacts ops in |           let ops = if struct_tree then add_artifacts ops else ops in | ||||||
|           let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in |           let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in | ||||||
|             (if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page) |             (if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page) | ||||||
|       (ilist 1 endpage) |       (ilist 1 endpage) | ||||||
| @@ -455,7 +457,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | |||||||
|     Pdfpage.change_pages true pdf pages |     Pdfpage.change_pages true pdf pages | ||||||
|  |  | ||||||
| (* Do a dry run of all the drawing to collect subset information. *) | (* Do a dry run of all the drawing to collect subset information. *) | ||||||
| let dryrun ~filename ~bates ~batespad range pdf chunks = | let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks = | ||||||
|   let endpage = Pdfpage.endpage pdf in |   let endpage = Pdfpage.endpage pdf in | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|   let r = save_whole_stack () in |   let r = save_whole_stack () in | ||||||
| @@ -463,7 +465,7 @@ let dryrun ~filename ~bates ~batespad range pdf chunks = | |||||||
|   let pagenum = ref (hd range) in |   let pagenum = ref (hd range) in | ||||||
|     iter |     iter | ||||||
|       (fun chunk -> |       (fun chunk -> | ||||||
|          ignore (ops_of_drawops true pdf endpage filename bates batespad !pagenum (hd pages) chunk); |          ignore (ops_of_drawops struct_tree true pdf endpage filename bates batespad !pagenum (hd pages) chunk); | ||||||
|          match range with |          match range with | ||||||
|          | [x] when endpage > x -> pagenum := x + 1 |          | [x] when endpage > x -> pagenum := x + 1 | ||||||
|          | _ -> pagenum := endpage + 1) |          | _ -> pagenum := endpage + 1) | ||||||
| @@ -554,14 +556,14 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra | |||||||
|   (* Double up a trailing NewPage so it actually does something... *) |   (* Double up a trailing NewPage so it actually does something... *) | ||||||
|   let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in |   let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in | ||||||
|   let chunks = ref (split_around (eq NewPage) drawops) in |   let chunks = ref (split_around (eq NewPage) drawops) in | ||||||
|   dryrun ~filename ~bates ~batespad !range !pdf !chunks; |   dryrun ~struct_tree ~filename ~bates ~batespad !range !pdf !chunks; | ||||||
|   mcpage := 0; |   mcpage := 0; | ||||||
|     while !chunks <> [] do |     while !chunks <> [] do | ||||||
|       mcidr := -1; |       mcidr := -1; | ||||||
|       mcpage += 1; |       mcpage += 1; | ||||||
|       structdata =| StDataPage !mcpage; |       structdata =| StDataPage !mcpage; | ||||||
|       reset_state (); |       reset_state (); | ||||||
|       if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks); |       if hd !chunks <> [] then pdf := draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks); | ||||||
|       chunks := tl !chunks; |       chunks := tl !chunks; | ||||||
|       if !chunks <> [] then begin |       if !chunks <> [] then begin | ||||||
|         (* If the range is just a single page, and there is a next page, move to it. Otherwise, |         (* If the range is just a single page, and there is a next page, move to it. Otherwise, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user