mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Obet -draw-struct-trees properly
This commit is contained in:
		
							
								
								
									
										44
									
								
								cpdfdraw.ml
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								cpdfdraw.ml
									
									
									
									
									
								
							| @@ -207,9 +207,9 @@ type structdata = | ||||
|  | ||||
| let structdata = ref [] | ||||
|  | ||||
| let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = function | ||||
| let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page = function | ||||
|   | Qq ops -> | ||||
|       [Pdfops.Op_q] @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q] | ||||
|       [Pdfops.Op_q] @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q] | ||||
|   | Matrix m -> [Pdfops.Op_cm m]  | ||||
|   | Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)] | ||||
|   | Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)] | ||||
| @@ -245,7 +245,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | ||||
|   | SetMiterLimit m -> [Pdfops.Op_M m] | ||||
|   | SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)] | ||||
|   | FormXObject (a, b, c, d, n, ops) -> | ||||
|       create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops; | ||||
|       create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops; | ||||
|       [] | ||||
|   | Use n -> | ||||
|       let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in | ||||
| @@ -256,7 +256,9 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | ||||
|         if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata; | ||||
|       let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in | ||||
|         (res ()).page_names <- pdfname::(res ()).page_names; | ||||
|         [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_Do pdfname; Pdfops.Op_EMC] | ||||
|             (if struct_tree then [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) | ||||
|           @ [Pdfops.Op_Do pdfname] | ||||
|           @ (if struct_tree then [Pdfops.Op_EMC] else []) | ||||
|   | ImageXObject (s, obj) -> | ||||
|       Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);  | ||||
|       [] | ||||
| @@ -313,11 +315,11 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | ||||
|   | TextSection ops -> | ||||
|       let m = mcid () in | ||||
|         if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata; | ||||
|         [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]); | ||||
|          Pdfops.Op_BT] | ||||
|         @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ | ||||
|         [Pdfops.Op_ET; | ||||
|          Pdfops.Op_EMC] | ||||
|           (if struct_tree then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) | ||||
|         @ [Pdfops.Op_BT] | ||||
|         @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops | ||||
|         @ [Pdfops.Op_ET]  | ||||
|         @ (if struct_tree then [Pdfops.Op_EMC] else []) | ||||
|   | Text s -> | ||||
|       if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); | ||||
|       runs_of_utf8 s | ||||
| @@ -333,14 +335,14 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func | ||||
|   | Rise f -> [Pdfops.Op_Ts f] | ||||
|   | Newline -> [Pdfops.Op_T'] | ||||
|  | ||||
| and ops_of_drawops dryrun pdf endpage filename bates batespad num page drawops = | ||||
|   flatten (map (ops_of_drawop dryrun pdf endpage filename bates batespad num page) drawops) | ||||
| and ops_of_drawops draw_struct dryrun pdf endpage filename bates batespad num page drawops = | ||||
|   flatten (map (ops_of_drawop draw_struct dryrun pdf endpage filename bates batespad num page) drawops) | ||||
|  | ||||
| and create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops = | ||||
| and create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops = | ||||
|   respush (); | ||||
|   reset_state (); | ||||
|   let data = | ||||
|     Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops dryrun pdf endpage filename bates batespad num page ops)) | ||||
|     Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops)) | ||||
|   in | ||||
|   let obj = | ||||
|     Pdf.Stream | ||||
| @@ -420,14 +422,14 @@ let add_artifacts ops = | ||||
|   in | ||||
|     loop [] ops | ||||
|  | ||||
| let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||
| let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||
|   (res ()).num <- max (res ()).num (minimum_resource_number pdf range); | ||||
|   let endpage = Pdfpage.endpage pdf in | ||||
|   let pages = Pdfpage.pages_of_pagetree pdf in | ||||
|   let ops = | ||||
|     if contains_specials drawops | ||||
|       then None | ||||
|       else Some (ops_of_drawops false pdf endpage filename bates batespad 0 (hd pages) drawops) | ||||
|       else Some (ops_of_drawops struct_tree false pdf endpage filename bates batespad 0 (hd pages) drawops) | ||||
|   in | ||||
|   let ss = | ||||
|     map2 | ||||
| @@ -436,7 +438,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||
|            then | ||||
|              (match ops with | ||||
|               | Some x -> x | ||||
|               | None -> ops_of_drawops false pdf endpage filename bates batespad n p drawops) | ||||
|               | None -> ops_of_drawops struct_tree false pdf endpage filename bates batespad n p drawops) | ||||
|            else []) | ||||
|       (ilist 1 endpage) | ||||
|       pages | ||||
| @@ -445,7 +447,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||
|     map3 | ||||
|       (fun n p ops -> | ||||
|         if not (mem n range) then p else | ||||
|           let ops = add_artifacts ops in | ||||
|           let ops = if struct_tree then add_artifacts ops else ops in | ||||
|           let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in | ||||
|             (if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page) | ||||
|       (ilist 1 endpage) | ||||
| @@ -455,7 +457,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = | ||||
|     Pdfpage.change_pages true pdf pages | ||||
|  | ||||
| (* Do a dry run of all the drawing to collect subset information. *) | ||||
| let dryrun ~filename ~bates ~batespad range pdf chunks = | ||||
| let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks = | ||||
|   let endpage = Pdfpage.endpage pdf in | ||||
|   let pages = Pdfpage.pages_of_pagetree pdf in | ||||
|   let r = save_whole_stack () in | ||||
| @@ -463,7 +465,7 @@ let dryrun ~filename ~bates ~batespad range pdf chunks = | ||||
|   let pagenum = ref (hd range) in | ||||
|     iter | ||||
|       (fun chunk -> | ||||
|          ignore (ops_of_drawops true pdf endpage filename bates batespad !pagenum (hd pages) chunk); | ||||
|          ignore (ops_of_drawops struct_tree true pdf endpage filename bates batespad !pagenum (hd pages) chunk); | ||||
|          match range with | ||||
|          | [x] when endpage > x -> pagenum := x + 1 | ||||
|          | _ -> pagenum := endpage + 1) | ||||
| @@ -554,14 +556,14 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra | ||||
|   (* Double up a trailing NewPage so it actually does something... *) | ||||
|   let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in | ||||
|   let chunks = ref (split_around (eq NewPage) drawops) in | ||||
|   dryrun ~filename ~bates ~batespad !range !pdf !chunks; | ||||
|   dryrun ~struct_tree ~filename ~bates ~batespad !range !pdf !chunks; | ||||
|   mcpage := 0; | ||||
|     while !chunks <> [] do | ||||
|       mcidr := -1; | ||||
|       mcpage += 1; | ||||
|       structdata =| StDataPage !mcpage; | ||||
|       reset_state (); | ||||
|       if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks); | ||||
|       if hd !chunks <> [] then pdf := draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks); | ||||
|       chunks := tl !chunks; | ||||
|       if !chunks <> [] then begin | ||||
|         (* If the range is just a single page, and there is a next page, move to it. Otherwise, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user