mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										33
									
								
								cpdfjson.ml
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								cpdfjson.ml
									
									
									
									
									
								
							| @@ -182,14 +182,14 @@ let mkfloat f = `Assoc [("F", `Float f)] | |||||||
| let mkint i = `Assoc [("I", `Int i)] | let mkint i = `Assoc [("I", `Int i)] | ||||||
| let mkname n = `Assoc [("N", `String n)] | let mkname n = `Assoc [("N", `String n)] | ||||||
|  |  | ||||||
| let rec json_of_object pdf fcs no_stream_data = function | let rec json_of_object pdf fcs no_stream_data pcs = function | ||||||
|   | P.Null -> `Null |   | P.Null -> `Null | ||||||
|   | P.Boolean b -> `Bool b |   | P.Boolean b -> `Bool b | ||||||
|   | P.Integer i -> mkint i |   | P.Integer i -> mkint i | ||||||
|   | P.Real r -> mkfloat r |   | P.Real r -> mkfloat r | ||||||
|   | P.String s -> `String s |   | P.String s -> `String s | ||||||
|   | P.Name n -> mkname n |   | P.Name n -> mkname n | ||||||
|   | P.Array objs -> `List (map (json_of_object pdf fcs no_stream_data) objs) |   | P.Array objs -> `List (map (json_of_object pdf fcs no_stream_data pcs) objs) | ||||||
|   | P.Dictionary elts -> |   | P.Dictionary elts -> | ||||||
|       iter |       iter | ||||||
|         (function |         (function | ||||||
| @@ -201,19 +201,19 @@ let rec json_of_object pdf fcs no_stream_data = function | |||||||
|           | ("/Contents", P.Array elts) -> iter (function P.Indirect i -> fcs i | _ -> ()) elts |           | ("/Contents", P.Array elts) -> iter (function P.Indirect i -> fcs i | _ -> ()) elts | ||||||
|           | _ -> ()) |           | _ -> ()) | ||||||
|         elts; |         elts; | ||||||
|       `Assoc (map (fun (k, v) -> (k, json_of_object pdf fcs no_stream_data v)) elts) |       `Assoc (map (fun (k, v) -> (k, json_of_object pdf fcs no_stream_data pcs v)) elts) | ||||||
|   | P.Stream ({contents = (P.Dictionary dict as d, stream)} as mut) as thestream -> |   | P.Stream ({contents = (P.Dictionary dict as d, stream)} as mut) as thestream -> | ||||||
|       P.getstream thestream; |       P.getstream thestream; | ||||||
|       let str = |       let str = | ||||||
|         match P.lookup_direct pdf "/FunctionType" d with |         match P.lookup_direct pdf "/FunctionType" d, pcs with | ||||||
|         | Some _ -> |         | Some _, true -> | ||||||
|             Pdfcodec.decode_pdfstream_until_unknown pdf thestream; |             Pdfcodec.decode_pdfstream_until_unknown pdf thestream; | ||||||
|             begin match !mut with (_, P.Got b) -> Pdfio.string_of_bytes b | _ -> error "/FunctionType: failure: decomp" end |             begin match !mut with (_, P.Got b) -> Pdfio.string_of_bytes b | _ -> error "/FunctionType: failure: decomp" end | ||||||
|         | None -> |         | _ -> | ||||||
|             if no_stream_data then "<<stream data elided>>" else |             if no_stream_data then "<<stream data elided>>" else | ||||||
|               match !mut with (_, P.Got b) -> Pdfio.string_of_bytes b | _ -> error "failure: toget" |               match !mut with (_, P.Got b) -> Pdfio.string_of_bytes b | _ -> error "failure: toget" | ||||||
|       in |       in | ||||||
|         json_of_object pdf fcs no_stream_data (P.Dictionary [("S", P.Array [P.Dictionary dict; P.String str])]) |         json_of_object pdf fcs no_stream_data pcs (P.Dictionary [("S", P.Array [P.Dictionary dict; P.String str])]) | ||||||
|   | P.Stream _ -> error "error: stream with not-a-dictionary" |   | P.Stream _ -> error "error: stream with not-a-dictionary" | ||||||
|   | P.Indirect i -> |   | P.Indirect i -> | ||||||
|       begin match P.lookup_obj pdf i with |       begin match P.lookup_obj pdf i with | ||||||
| @@ -254,7 +254,7 @@ let json_of_op pdf no_stream_data = function | |||||||
|       `List [mkfloat c; mkfloat m; mkfloat y; mkfloat k; `String "k"] |       `List [mkfloat c; mkfloat m; mkfloat y; mkfloat k; `String "k"] | ||||||
|   | O.Op_m (a, b) -> `List [mkfloat a; mkfloat b; `String "m"] |   | O.Op_m (a, b) -> `List [mkfloat a; mkfloat b; `String "m"] | ||||||
|   | O.Op_l (a, b) -> `List [mkfloat a; mkfloat b; `String "l"] |   | O.Op_l (a, b) -> `List [mkfloat a; mkfloat b; `String "l"] | ||||||
|   | O.Op_BDC (s, obj) -> `List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data obj; `String "BDC"] |   | O.Op_BDC (s, obj) -> `List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data false obj; `String "BDC"] | ||||||
|   | O.Op_gs s -> `List [`String s; `String "gs"] |   | O.Op_gs s -> `List [`String s; `String "gs"] | ||||||
|   | O.Op_Do s -> `List [`String s; `String "Do"] |   | O.Op_Do s -> `List [`String s; `String "Do"] | ||||||
|   | O.Op_CS s -> `List [`String s; `String "CS"] |   | O.Op_CS s -> `List [`String s; `String "CS"] | ||||||
| @@ -299,7 +299,7 @@ let json_of_op pdf no_stream_data = function | |||||||
|          mkfloat t.Pdftransform.d; mkfloat t.Pdftransform.e; mkfloat t.Pdftransform.f; |          mkfloat t.Pdftransform.d; mkfloat t.Pdftransform.e; mkfloat t.Pdftransform.f; | ||||||
|          `String "Tm"] |          `String "Tm"] | ||||||
|   | O.Op_Tj s -> `List [`String s; `String "Tj"] |   | O.Op_Tj s -> `List [`String s; `String "Tj"] | ||||||
|   | O.Op_TJ pdfobject -> `List [json_of_object pdf (fun _ -> ()) no_stream_data pdfobject; `String "TJ"] |   | O.Op_TJ pdfobject -> `List [json_of_object pdf (fun _ -> ()) no_stream_data false pdfobject; `String "TJ"] | ||||||
|   | O.Op_' s -> `List [`String s; `String "'"] |   | O.Op_' s -> `List [`String s; `String "'"] | ||||||
|   | O.Op_'' (k, k', s) -> `List [mkfloat k; mkfloat k'; `String s; `String "''"] |   | O.Op_'' (k, k', s) -> `List [mkfloat k; mkfloat k'; `String s; `String "''"] | ||||||
|   | O.Op_d0 (k, k') -> `List [mkfloat k; mkfloat k'; `String "d0"] |   | O.Op_d0 (k, k') -> `List [mkfloat k; mkfloat k'; `String "d0"] | ||||||
| @@ -325,9 +325,9 @@ let json_of_op pdf no_stream_data = function | |||||||
|   | O.Op_scnName (s, fs) -> |   | O.Op_scnName (s, fs) -> | ||||||
|       `List (map (fun x -> mkfloat x) fs @ [`String s; `String "scnName"]) |       `List (map (fun x -> mkfloat x) fs @ [`String s; `String "scnName"]) | ||||||
|   | O.InlineImage (dict, data) -> |   | O.InlineImage (dict, data) -> | ||||||
|       `List [json_of_object pdf (fun _ -> ()) no_stream_data dict; `String (Pdfio.string_of_bytes data); `String "InlineImage"] |       `List [json_of_object pdf (fun _ -> ()) no_stream_data false dict; `String (Pdfio.string_of_bytes data); `String "InlineImage"] | ||||||
|   | O.Op_DP (s, obj) -> |   | O.Op_DP (s, obj) -> | ||||||
|       `List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data obj; `String "DP"] |       `List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data false obj; `String "DP"] | ||||||
|  |  | ||||||
| (* parse_stream needs pdf and resources. These are for lexing of inline images, | (* parse_stream needs pdf and resources. These are for lexing of inline images, | ||||||
|  * looking up the colourspace. We do not need to worry about inherited |  * looking up the colourspace. We do not need to worry about inherited | ||||||
| @@ -358,19 +358,16 @@ let do_precombine_page_content pdf = | |||||||
|     Pdfpage.change_pages true pdf pages' |     Pdfpage.change_pages true pdf pages' | ||||||
|  |  | ||||||
| let json_of_pdf | let json_of_pdf | ||||||
|   ~parse_content |   ~parse_content ~no_stream_data ~decompress_streams ~precombine_page_content | ||||||
|   ~no_stream_data |  | ||||||
|   ~decompress_streams |  | ||||||
|   ~precombine_page_content |  | ||||||
|   pdf |   pdf | ||||||
| = | = | ||||||
|   let pdf = if parse_content && precombine_page_content then do_precombine_page_content pdf else pdf in |   let pdf = if parse_content && precombine_page_content then do_precombine_page_content pdf else pdf in | ||||||
|   if decompress_streams then |   if decompress_streams then | ||||||
|     Pdf.objiter (fun _ obj -> Pdfcodec.decode_pdfstream_until_unknown pdf obj) pdf; |     Pdf.objiter (fun _ obj -> Pdfcodec.decode_pdfstream_until_unknown pdf obj) pdf; | ||||||
|   Pdf.remove_unreferenced pdf; |   Pdf.remove_unreferenced pdf; | ||||||
|   let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in |   let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data false pdf.P.trailerdict) in | ||||||
|   let parameters = |   let parameters = | ||||||
|     (-1, json_of_object pdf (fun x -> ()) false |     (-1, json_of_object pdf (fun x -> ()) false false | ||||||
|       (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 2); |       (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 2); | ||||||
|                        ("/CPDFJSONcontentparsed", Pdf.Boolean parse_content); |                        ("/CPDFJSONcontentparsed", Pdf.Boolean parse_content); | ||||||
|                        ("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data)); |                        ("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data)); | ||||||
| @@ -387,7 +384,7 @@ let json_of_pdf | |||||||
|     let ps = ref [] in |     let ps = ref [] in | ||||||
|       P.objiter |       P.objiter | ||||||
|         (fun i pdfobj -> |         (fun i pdfobj -> | ||||||
|           ps := (i, json_of_object pdf fcs no_stream_data pdfobj)::!ps) |           ps := (i, json_of_object pdf fcs no_stream_data parse_content pdfobj)::!ps) | ||||||
|         pdf; |         pdf; | ||||||
|       parameters::trailerdict::!ps |       parameters::trailerdict::!ps | ||||||
|   in |   in | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user