From 1f122720d14b6fa048f0309e2a405a47ac1942fa Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sat, 14 Nov 2020 16:40:01 +0000 Subject: [PATCH] Moving things --- cpdf.ml | 64 ++++++++++++++++++++++++++++++++++++++++++++ cpdf.mli | 5 ++++ cpdfcommand.ml | 70 +++---------------------------------------------- cpdfmanual.pdf | Bin 239726 -> 239739 bytes cpdfmanual.tex | 4 +-- 5 files changed, 74 insertions(+), 69 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index b6e343d..ac969ca 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -4567,3 +4567,67 @@ let trim_marks_page fast pdf n page = let trim_marks ?(fast=false) pdf range = process_pages (trim_marks_page fast pdf) pdf range + +let rec remove_all_text_ops pdf resources content = + let is_textop = function + Pdfops.Op_Tj _ | Pdfops.Op_' _ | Pdfops.Op_'' _ | Pdfops.Op_TJ _ -> true + | _ -> false + in + let content' = + let ops = Pdfops.parse_operators pdf resources content in + Pdfops.stream_of_ops + (option_map (function x -> if is_textop x then None else Some x) ops) + in + [content'] + +let remove_all_text_page pdf p = + let resources = p.Pdfpage.resources in + let content = p.Pdfpage.content in + process_xobjects pdf p remove_all_text_ops; + {p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf + +let remove_all_text range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let pagenums = indx pages in + let pdf = ref pdf in + let pages' = ref [] in + iter2 + (fun p pagenum -> + let p', pdf' = + if mem pagenum range + then remove_all_text_page !pdf p + else p, !pdf + in + pdf := pdf'; + pages' =| p') + pages + pagenums; + Pdfpage.change_pages true !pdf (rev !pages') + +let remove_dict_entry pdf key = + Pdf.objselfmap + (function + (Pdf.Dictionary _ as d) | (Pdf.Stream _ as d) -> + Pdf.remove_dict_entry d key + | x -> x) + pdf; + pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict key + +let remove_clipping_ops pdf resources content = + let ops = Pdfops.parse_operators pdf resources content in + let rec process a = function + Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t + | h::t -> process (h::a) t + | [] -> rev a + in + [Pdfops.stream_of_ops (process [] ops)] + +let remove_clipping pdf range = + let remove_clipping_page _ page = + let content' = + remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page remove_clipping_ops; + {page with Pdfpage.content = content'} + in + process_pages remove_clipping_page pdf range diff --git a/cpdf.mli b/cpdf.mli index 2a51631..6835fad 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -431,6 +431,8 @@ val draft : string option -> bool -> int list -> Pdf.t -> Pdf.t (** Squeeze a PDF *) val squeeze : ?logto:string -> ?pagedata:bool -> ?recompress:bool -> Pdf.t -> unit +val remove_all_text : int list -> Pdf.t -> Pdf.t + (**/**) val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit @@ -464,3 +466,6 @@ val ocg_order_all : Pdf.t -> unit val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string +val remove_dict_entry : Pdf.t -> string -> unit + +val remove_clipping : Pdf.t -> int list -> Pdf.t diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 53fce7f..76ea264 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3406,26 +3406,6 @@ let list_spot_colours pdf = | _ -> ()) pdf -let remove_clipping_ops pdf resources content = - let ops = Pdfops.parse_operators pdf resources content in - let rec process a = function - Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t - | h::t -> process (h::a) t - | [] -> rev a - in - [Pdfops.stream_of_ops (process [] ops)] - -let remove_clipping pdf range = - let remove_clipping_page _ page = - let content' = - remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content - in - Cpdf.process_xobjects pdf page remove_clipping_ops; - {page with Pdfpage.content = content'} - in - Cpdf.process_pages remove_clipping_page pdf range - - (* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) let add_bookmark_title filename use_title pdf = let title = @@ -3468,43 +3448,6 @@ let create_pdf pages pagesize = let pdf, pageroot = Pdfpage.add_pagetree (many page args.createpdf_pages) (Pdf.empty ()) in Pdfpage.add_root pageroot [] pdf -let rec remove_all_text_ops pdf resources content = - let is_textop = function - Pdfops.Op_Tj _ | Pdfops.Op_' _ | Pdfops.Op_'' _ | Pdfops.Op_TJ _ -> true - | _ -> false - in - let content' = - let ops = Pdfops.parse_operators pdf resources content in - Pdfops.stream_of_ops - (option_map (function x -> if is_textop x then None else Some x) ops) - in - [content'] - -let remove_all_text_page pdf p = - let resources = p.Pdfpage.resources in - let content = p.Pdfpage.content in - Cpdf.process_xobjects pdf p remove_all_text_ops; - {p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf - -let remove_all_text range pdf = - let pages = Pdfpage.pages_of_pagetree pdf in - let pagenums = indx pages in - let pdf = ref pdf in - let pages' = ref [] in - iter2 - (fun p pagenum -> - let p', pdf' = - if mem pagenum range - then remove_all_text_page !pdf p - else p, !pdf - in - pdf := pdf'; - pages' =| p') - pages - pagenums; - Pdfpage.change_pages true !pdf (rev !pages') - - let write_json output pdf = match output with | NoOutputSpecified -> @@ -4339,14 +4282,7 @@ let go () = (map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf)) | Some (RemoveDictEntry key) -> let pdf = get_single_pdf args.op true in - (* 1. Process all objects *) - Pdf.objselfmap - (function - (Pdf.Dictionary _ as d) | (Pdf.Stream _ as d) -> - Pdf.remove_dict_entry d key - | x -> x) - pdf; - (* FIXME: We might like to do the trailer dictionary too *) + Cpdf.remove_dict_entry pdf key; write_pdf false pdf | Some ListSpotColours -> let pdf = get_single_pdf args.op false in @@ -4354,7 +4290,7 @@ let go () = | Some RemoveClipping -> let pdf = get_single_pdf args.op false in let range = parse_pagespec pdf (get_pagespec ()) in - write_pdf false (remove_clipping pdf range) + write_pdf false (Cpdf.remove_clipping pdf range) | Some CreateMetadata -> let pdf = get_single_pdf args.op false in write_pdf false (Cpdf.create_metadata pdf) @@ -4379,7 +4315,7 @@ let go () = | Some RemoveAllText -> let pdf = get_single_pdf args.op false in let range = parse_pagespec pdf (get_pagespec ()) in - write_pdf false (remove_all_text range pdf) + write_pdf false (Cpdf.remove_all_text range pdf) | Some ShowBoxes -> let pdf = get_single_pdf args.op false in let range = parse_pagespec pdf (get_pagespec ()) in diff --git a/cpdfmanual.pdf b/cpdfmanual.pdf index 29e0092a031ebad74f7458193ff54b0f5a708ea1..7b9c0ddd4d0659255b107f5b03be402de609f5c6 100644 GIT binary patch delta 3729 zcmah`X*d)Nw4TM-hwMU#FjCo#Z5TdFl)X$whRD8+eVH;NMJO$lT~pR<$tX)?Un65{ zXhvr2vgET1x%!^_%Qu8!W%^{0%1eD%Vmtx6a5?s77rlca0I%uf)_?`w8CpDN$obHpm0M)O!wjhe?tS zz8e*`^F^+e*V4>kL#qD!xiAB-ALk%K=PozQY9IN_$raCE@8Za+cHQ3oshY)RJ0`H{ zq1J2kkB=;Qn%m#e{gK#c9lrL@;lwYL=W(90rYYaYn^KWrfEo3b6TZ-lrF4)@Cdy7O zg5mOtwJY(|kN$JcU;k*w(*dno($^jG+K8txkV*lbnPHY(Av24tYW^SlCZ`M_H<^`D z!bqL4?CbWnSss%Y;9L~SM06u+XStfA)X)AXvaC8tiMvEo;?gt^0{=7+Nm(F?ee|($ zm;RH26Z(pLen;?hN9Q=7-1M2@jQ+Q{-LH$j($wr@4$7jz1RSXUK2y% zE9xD~(J+Z4Ir=$Jpd&cE4>EDxTj$f^*R9^D@b7~l0(cW&H%8J;7qGU=Wj z^~mA(y5)UVBzaOalOB0UCrxlBwX4dTSDxUmJMDKc*oO6Hxw?Y536+=6dARd z8wd{$?R$TV!`tOrXP77At*T1(d*S5TVF+d- zfi@Yk=T_>f&FNr!Y+K*n2e*{IVZ0E=-dj{}&&$6DFU2aUtxJA;D|8Y3@K-qd74VBi z-k$aZ(}~;~J&HSYyUhQZ<0z1y{UeUekIs)_K}!Frl>XuItJ(V0q)!>e1ip<ZL@pR+?{`WbbWROKoS*cx%3vSC z1h10})`b=0Fu}Z-ceztTWPi;0e~Y0n?q6gpH`A zW5K{Po;?w4T1@lzY{J|b1t&F^glCpraM!w3^75aAH{BiY(<6!N;vJ!1ybUYkU(4mT z{dpxBZ@1 zMa#1*S_IxUx)tg!2{Y1GQBqM-R#yIBvi9E>U<9j;iHU}qp^*t(OU=+&Rnq`&2shMH zGEy;CSJhB4G0`&ClhplxB~I*$SKR|VL%k$b)HKr1ascJ=G8s-+ogB_|?jB=ch5XUx z-tvXkX!aujyL)XRvL|6aFxqNQR(F4cL>ha?biVJxp3VxNe6ZWprgr}@ys>MS%b;y-0IPbb-y-V-hA(0uQ@Q`+phb8UdB!8k`q8CUv1=Srfl zw1*XyPS$nZc{q_Ovc@Un@gxJWO2~JX`U~8##V}=D$BH*xtV!@MFtgCSoH%jfIiN3& zJp2nPu;8=|gzHY|sTA=OhwDzkytcuAl5WO2-n3(dhg2+HhcWLLSH`)9;NollYy6h& ziQRuAvraji{H1L9#0&pw<Gzz`7Q(!VY zx2=?ccx%9A<8xLChrT~j;mxCrL&q-@(*PNW$+!_WGu;z?{Rca|`JTCHT9d}bi-}px zfl>bG1No(fo|s=>uxLwdjg37__BTEA2E_EmW&ulEdRou(z3958w7Lt^@F{@Y=^(G! zCi<8!4i*&A5%>ADpKg{^yP?eyKh0qN)XUeG!M;9GHv|% zf#H5~lIj(VC_eAIAgNnM(?1e=QaO2JF;|YC1qk-3i#ABNb@^BanNt9!v2HCvM3zO!Zvr1uUkQ-I z#0=!l0a=1Q31?mSg_(CUzQh!mYI0P`-*uMiJOtcr!qiApA%R zrxHg$kV05)(X9e_wFZEGn^){fRWW%@`~rPxLAXF}K-R$A1a+4);{b6`wvpJJ(SsIV z+Qa&ysbtj<#Y5YCA7o|~vo&uyB<=Vopzw1v(jkzAc)ym=%b9!sQ zccAI;v#hng>5^Ds$ku>pf73>(2alfVY-2AiLbT)Tos^!akOv3lnxlh5@Idw5Bj?0_ zj>HN#FA>{m+*3mxXYp(951zdVsjYvN;ss)abk2&0G?Vk^&4=Y;lU_F0D_FjD_&0(~ zqjF5^643ik>xvzAY8Gu;XUxiv?c(wwkhM4Znn?JiUpNEj+@(#H>S6$y6k%XT?@U5w$!E4zE^wdt9 zBpTiSUGO9=AzK8Yc5s-G^sw(f`yGLZ;|0&@#kc}Vl$(8+NzR4@dr>s00UgvA_An={ zM`S=`*UONq?VQJ7kyf8IkVRE5D=E4#=J74YB4=7KOK!^|*TX5$+i;)#V(N37T8D_o zviv2#WA#YlOZaU5G%{R%nll4>$A&Kl3r7cp$I}+{_e$>5KEBZ zbIMuEn0$$5W2CM{jjjB6)e&S4ZD_oA+%Pe(W#PAu@3Mb6rFJl?FTk2J@x)h6YV*K{ zqUTWj#4gLhaxOO5Gq}*PQ4% z5CdK34r+{U8lI0L!Sj;R3yZ)Kyg4jse1QP-gp#=zY`s4)X6sz(--^KAc#oY~YQto> zf_^kgx-AH~M21RwNs6%yW68J|syCjN7BG;9(Ai033$#RH)_M!dL_=mkqYH;Yu_I>I zJyK1~(>~;RNAJqoJc>VA5XbDF-|vrm&f~uB^FGh(+;i{t2Vu?!;rM27{0*>Hj@A=Z z;Hk@*&@Xy|9osGo$bq`^+2#0BJK=BPaHHXGVS24sg_}=KUj6;xv{BCsjnfV?M)07O zg&rld>s$&3T;K*N9DH!=9DpElKU-r6;8w2v(jYOmggNi zHwlRnKAjY4vKTHao4VXv^ECl`x94<9Lf;d??_tQoWy_iAin%&d_^B}aH~{2X zifNpwb0CUN;Cg$^^;Z#Cn2e{N=pXvU?xo1z6^~3B|V-Fr<4F`W``;IL=%PW3A6LZGo z)_OWIdsR2NuJEG%$anD4;&|C>;MxkF>MT=I82p$?G_reR71H=PQGi?@SRQTk8E~+> z_V5jEy=0#&qA#ypjZb63y^zB{Yj}&T`}*?hc0NT2O-s_i^q^>+nq16w+CR}Pb_Y|; zPAGal!~Kk^jgYGxWOg&7&Vj$agvV=IY_er4@4vVQw$l6)s;6!O=1)G0fV>ayg@&2G@oG-G|>5j`q zc8IOsca$_WlNMR)z9#xO@Am^Dwd?2f$o`2gCtM#b6;9Z^(5$G-E4tim0Zk6)XYG>h z$Af+5Kg=Kdk~}{fkSaUhwykcgG4}~r?mT$UO2jNIl8W2)8srt30f>_MJZgi=cedfT z8{Pdp>Q}_RyOdD%YN1GptKnjV6sJfzkkAGl7Wt!e+XCM_X5HY1O0`<~5>7~jd`tf5 zUw{FRjJ6PQ=5geKT*d<{;FYBT{hG?s&t)}aYltbr@xiIjNR@}mhTJy~8A8PelXJkA z8~lyzkTT9dm%0aFDqtc|?zGx9;{XgQKDhK;$+*NQY)D2Z@5i%#DIM|)!C8|B9B=)W z?_*LIif&1sT;A~<=)0L^yL^wNA$$BnoVT_cI0>@qfIXXkFPd;;FM7K!@+sXun};vu z^aT@SoW;)+1t&E~m&VbwZk%VZSsSfj<1%&qlLE|>mA_X8^vC~WQecR6c#5C7cj<|- zvgC)|+1&x-Y5UB~=J*MIc1+r=-03gm>6m=d`%S&j419Rgiyx=dJ}cW7!iMjLE;m^$ zi)bX_8^xw8$&zGzoO z2t^Mf9qpr0=G(O~5&%?~FEsDfF=fG##vq5^chGBMaPRj?cZdd=Z$~tLS2~UFD|VA^ ziZe0YdITcfOWwDfc|Pg+=7>Z0_t3@lMvC>=F%P-pxK^R)w8-0Fk4Oo2F z_LaSQVY@E2G?W7+iE0g2c0_J7vG4+se$RcTdc2pG(mLtKLqW}^PJ*DQs zVRCTckW;gQ#Jomb=O6^-!6D%tv`~agbBS^GFhygPs!V z)nXo=;-?w(o?-aVPS60?-e4wdN}49!`;dX6HIasM#TiOdQ8d-w9}I~pK8kH@BoF8C zlu}iPuP{f9qFz-(Nu>)$kk7(td=Wy}*0NWwi6x12i~A|+=xRpkX3 zWy}6{1A)d{vqwLD-`XveE>u(HC_bI)oG`&k}=Xg_DqD`mz?tUa$<`$Y`9tmJ>_`TSEqIcnvP!`tl|U491N#@~L433S_5{kE3Yx)YAJNpL)T zW-j98Vnw%)=!;DrbWfV4;u3cQKA^h$vnYJ)0QxD2a9fA8%`8T0#r*3zI}+fJ=#zKF zHV8Fji8Z~kLI+0roAk;5d@y#S0C#EaZDUMeB;fDfC%?q->k6QDvxB*@CinB-u(N#D zKP`tlTw0?EN5HV*oorFn9Y30(ZJ)gU?AEL|s!tx#dEdU((YcRkuSN)3oag>`qz*;m zp_U*L{~ppf@_Bflzb#lp!4&3Vbzgv5GM0HCc*y-dK00(&s5@PS%<`WZMMBjw&I7`A z7igID$XWFYIR=&r9v5J1jV+(spX~lh)bc<;8+j66C2;3s5Y$CLzuXbdbxK#Df+P1a~pGy|{e!3U<%mCJB2v^0gi$gQXJw zth|>j4dTIF5B{^{B)LLJHIN|fb)XPR1g$V2kNJE%^Fgsy&ex@BP~QoccqgX(W1c#w z3?r5ae#~#oB@cE#DLa2Mg&{Uo+g82aUDQ?(cOQqM+MS0Y`9~qWz-s*28LXLWkQ6n! zRmP!y5{m3!$PbGe>TpV(o&DF73%ko}qax8}d(s}obBMq6q1r!wD;_O*Xd%iPGyQv9V|k;>}W>DdY_ z+Fq8lHrQf0vG9B6m7g3&_E3ZH0h|(GgzNiPWNAfMW=B*~o6CjaV=up^QS^(piAm_> z0a1gX`=oo4satSGb$k-%jG^tJ~l$y3yWin{WxyJbuXyHO}6`mAQqaN(Gn8 z&L}19CY*a1+qC5t+Cha|eo5 zeq%}ro&&mN*HiLsdTJ-@=+@$}b2SMkn&iI$B@($G^i}76m#hLfaS92Vk+bY`NY7}y z`iKK6INoMMn_MhpI?6SQp`0A@Y06TWb1)4Sv+*sePT+4kGo(K|ldK`WRW03=E`>^9 z@gEnjNl&&ME}wX=_o@4$f>+N;sTw+=waVFIqcR4sPhVFFrkqVuZ(zRgYq^FOherXI zkG=~kiv4~8#`T`9g77=ZS|1@?*lMtC|JAd8cbW5}uj3a2L!t^k`zY_#I&lUOkmy*Y zG7@u`9;6-fye^%<6!}0{Bl`#Qf)3M{AclSTP0Eqw2UjXqC8b}imeVeSOl>#hn_RAR zuZGCLjCSUjy!{G#KT(@T zr6K(yYW2IDW2-?q1YAF&zC<$P5XRMSxyX9h*=?iPT1Uw_`cvg#UEafe>L4ZW>-5NJ zcUmTE>-gKcoke|JHP_A625q&1{Tk@u0(dPH?w*)~N$|GfjnC(HJK*c8;F{pgoh^CV z`TxM+mH9COC2!{Xz%GI=$aNFuL-Y;}3-P%a_?Ln%`NKlPE{23fhWHR*+B*80n%XdB KWh;AY*na>x#pB!n diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 15173ee..2f94805 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -2343,10 +2343,10 @@ recommended when file size is the sole consideration. \noindent\verb!cpdf -list-spot-colors in.pdf! \vspace{1.5mm} - \noindent\verb!cpdf -remove-dict-entry in.pdf -o out.pdf! + \noindent\verb!cpdf -remove-dict-entry in.pdf -o out.pdf! \vspace{1.5mm} - \noindent\verb!cpdf -remove-clipping in.pdf -o out.pdf! + \noindent\verb!cpdf -remove-clipping [] in.pdf -o out.pdf! \end{framed}} \section{Draft Documents} \index{draft}