mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										23
									
								
								cpdfjson.ml
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								cpdfjson.ml
									
									
									
									
									
								
							| @@ -16,9 +16,6 @@ number, and flags used when writing (which may be required when reading): | |||||||
|   round-trip if false). |   round-trip if false). | ||||||
|   o /CPDFJSONmajorpdfversion (CPDFJSON integer) |   o /CPDFJSONmajorpdfversion (CPDFJSON integer) | ||||||
|   o /CPDFJSONminorpdfversion (CPDFJSON integer) |   o /CPDFJSONminorpdfversion (CPDFJSON integer) | ||||||
|   o /CPDFJSONisUTF8 (Optional. Format 3. If true, strings are converted to UTF8 |  | ||||||
|   before conversion to JSON, and converted back to PDFDocEncoding/UTF16BE during |  | ||||||
|   converstion to PDF.) |  | ||||||
|  |  | ||||||
| Object 0: The PDF's trailer dictionary | Object 0: The PDF's trailer dictionary | ||||||
|  |  | ||||||
| @@ -30,7 +27,17 @@ Objects 1..n: The PDF's objects. | |||||||
|   o Names are written as {"N": "/Pages"} |   o Names are written as {"N": "/Pages"} | ||||||
|   o Indirect references are integers |   o Indirect references are integers | ||||||
|   o Streams are {"S": [dict, data]} |   o Streams are {"S": [dict, data]} | ||||||
|   o Strings are converted into JSON strings in a way which is fully reversible.  |   o Strings are converted into JSON strings in a way which is fully reversible. | ||||||
|  |     In original (utf8=false) mode, the bytes of the string in PDF representation | ||||||
|  |     are converted into UTF8, rather than the string itself being converted. In | ||||||
|  |     UTF8 mode (utf8=true), instead: | ||||||
|  |       - If a String contains only PDFDocEncoding characters, is is converted | ||||||
|  |         to UTF8, and stored as {"U" : "..."} | ||||||
|  |       - If a String has a BOM and successfully converts to UTF8, it is converted | ||||||
|  |         to UTF8, and stored as {"V" : "..."} | ||||||
|  |       - If a String has a BOM but fails to convert, or has no BOM, it is stored | ||||||
|  |         in original mode, as an unmarked string. | ||||||
|  |     In all cases, this process is still reversible. | ||||||
|  |  | ||||||
| There are two subformats: parsing content streams or not.  Hello World in CPDF | There are two subformats: parsing content streams or not.  Hello World in CPDF | ||||||
| JSON without parsing content streams: | JSON without parsing content streams: | ||||||
| @@ -285,12 +292,15 @@ let mkfloat f = `Assoc [("F", `Float f)] | |||||||
| let mkint i = `Assoc [("I", `Int i)] | let mkint i = `Assoc [("I", `Int i)] | ||||||
| let mkname n = `Assoc [("N", `String n)] | let mkname n = `Assoc [("N", `String n)] | ||||||
|  |  | ||||||
|  | let json_string_of_pdfstring_utf8 = | ||||||
|  |   Pdftext.utf8_of_pdfdocstring | ||||||
|  |  | ||||||
| let rec json_of_object ~utf8 ?(clean_strings=false) pdf fcs ~no_stream_data ~parse_content = function | let rec json_of_object ~utf8 ?(clean_strings=false) pdf fcs ~no_stream_data ~parse_content = function | ||||||
|   | P.Null -> `Null |   | P.Null -> `Null | ||||||
|   | P.Boolean b -> `Bool b |   | P.Boolean b -> `Bool b | ||||||
|   | P.Integer i -> mkint i |   | P.Integer i -> mkint i | ||||||
|   | P.Real r -> mkfloat r |   | P.Real r -> mkfloat r | ||||||
|   | P.String s -> `String (if clean_strings then Pdftext.simplify_utf16be s else if utf8 then Pdftext.utf8_of_pdfdocstring s else s) |   | P.String s -> `String (if clean_strings then Pdftext.simplify_utf16be s else if utf8 then json_string_of_pdfstring_utf8 s else s) | ||||||
|   | P.Name n -> mkname n |   | P.Name n -> mkname n | ||||||
|   | P.Array objs -> `List (map (json_of_object ~utf8 pdf fcs ~no_stream_data ~parse_content) objs) |   | P.Array objs -> `List (map (json_of_object ~utf8 pdf fcs ~no_stream_data ~parse_content) objs) | ||||||
|   | P.Dictionary elts -> |   | P.Dictionary elts -> | ||||||
| @@ -489,8 +499,7 @@ let json_of_pdf | |||||||
|       (fun _ obj -> match obj with Pdf.Stream _ -> Pdfcodec.decode_pdfstream_until_unknown pdf obj | _ -> ()) |       (fun _ obj -> match obj with Pdf.Stream _ -> Pdfcodec.decode_pdfstream_until_unknown pdf obj | _ -> ()) | ||||||
|       pdf; |       pdf; | ||||||
|   Pdf.remove_unreferenced pdf; |   Pdf.remove_unreferenced pdf; | ||||||
|   (* Not UTF8, because /ID strings are not actually in PDFDocEncoding *) |   let trailerdict = (0, json_of_object ~utf8 pdf (fun x -> ()) ~no_stream_data ~parse_content:false pdf.P.trailerdict) in | ||||||
|   let trailerdict = (0, json_of_object ~utf8:false pdf (fun x -> ()) ~no_stream_data ~parse_content:false pdf.P.trailerdict) in |  | ||||||
|   let parameters = |   let parameters = | ||||||
|     (-1, json_of_object ~utf8 pdf (fun x -> ()) ~no_stream_data:false ~parse_content:false |     (-1, json_of_object ~utf8 pdf (fun x -> ()) ~no_stream_data:false ~parse_content:false | ||||||
|       (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 3); |       (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 3); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user