From a9f18eddb88bb2ecf7c97804693bbd636078ec4c Mon Sep 17 00:00:00 2001 From: Amanda Cameron Date: Sat, 16 Sep 2023 22:29:44 -0700 Subject: [PATCH] chore: adding test case for odt tables (#1434) ODT table extraction is happening! Just added to an existing example-doc and an accompanying test case. --- CHANGELOG.md | 9 ++++++- example-docs/fake.odt | Bin 8950 -> 14334 bytes test_unstructured/partition/odt/test_odt.py | 25 ++++++++++++++++---- test_unstructured/partition/test_auto.py | 4 ++-- unstructured/__version__.py | 2 +- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 821e10d9f..a14c4c3af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ -## 0.10.15 +## 0.10.16-dev0 +### Enhancements + +### Features + +### Fixes + +## 0.10.15 ### Enhancements diff --git a/example-docs/fake.odt b/example-docs/fake.odt index 9050499723817a5d4514fe683bd29d7b8c73e3e5..411a785b51d5ce3d9598a4ff75aef7a823e4ba53 100644 GIT binary patch literal 14334 zcmeIZbzEJ$);7AZLV@D$En3{&-QC@tg}ayH?(PnyxVyW%7b))UeDu8UK6{@n=icvk z|Gk5(MUs{AWM*V$Br`LQtOOW13IG5F08|uFsc84n!;%63fM1`tEP$1nm63z1jgg*> zjis4^o`ab+kj4pUNNufWZ)Q(zZDRy9v^H?GG6FhKI~ch*$o>b|oBqG$7odPO(Adnx z(e7U`_VhG*1_nl!MsG=LI~oH=JG);fE>@QR41)1DAa8X$>g(Cj{IeF;-)Pxb+c?_% zQvEM_IDe;SXKi9OU$U*N{H-{@u0#j*G zJWo{!>QpK|5P^isD#aFXeLI*jo~lV4T43p+sEnq|e|`i3;h*B7lv%MKyJoxHj9P2M zmDojgoRu7Gm*HsnXH=q|L3;J#{tDtvwN^XrhJfOVt`8rG1mK(sUP;9B|dI z7FDRoN~8UQrxi4cE=~^Ud*kF=^6cmB2wcz0IjgXol-a6+5Zvs71O+YM6v{m{i9ppt z^|Qqib4fOg_YqlN+lqyINKL2O;^kYPC@{rhz&{)j&8m-garc_7nT0{6AEY=HXktD?HV7Spe+|0Cu){<2wZ(f6giwQ3 zOi^QW{Bak}MAIN-yU`uGi*+p`dXnOlxU$;{uQh`mOee2{iTi54FJ%Yc-bQ?2F7D!l zses>)Ta0UYmk4K|1SisosGu!x_W=BSY7i0KTApdls$>B{lVZu4pg{mbHGcD~r(lL~ zDg2cYy7w;e3}&NzJe0Hwg>Adv!ZTx5)|gah#D85*?6v6jNZj>}f?Q~X+VYl|KTNa#kN)z?-Vo9?6y zy+LFpAfXOO{AA%l0RSU#0N{Vd^WWV>uy41Ml^)Q{*vQ_2+Ro5;GX}LsL?#LgDF52g61LxX{{|HR#`5u$@<+dPGg?u^!GqXjM_CBBQ}) z;vrGf+If~Dk>joiI)}js8UDpqazyBeW0G1r9F+qZ&7L+ph>W-w8#f&thZ8t4`QN% z@&Eux7ytkYfOEv>GuZf9hQ_1{WASc818h$VP7`0B*X{WXcB|r7?Y^eb z4zg{rzsrkn>(7(M(z@Zr<_Q$WDb{YwCUhw+!-3-a!d!Si$2(<}4q2@hJt z{W9&&3w-3xL!eB=>bYtEI*_6G=@tq#Yo2z*?n!Cg?pipkd=29CmP&f%rJ3dPwt<1e zMg?s;xWeU`#}#m0Ut*wjZ9Xt~Jp)aOjd_9sqaO>y9GR!Cm-*!6v~4l+m}`AxbX-{7 zY?^Atqd8Wp(VqOu70SY8Z!PGG?n-Ydd*|ZV%vH1B0Iaoveubdh>z&)SJUd(4_C0HJ z<#XLeqnyP)8pSQ=H{p{LS;bq{z|SWFdpDP6PE_}A<3V?i&Z_P6<5HB7s)9SEzo$Kg z6z&%_{w?7Ba^J*VGv-B{+xqSx?r#0;!RHYU35(OpXHDUyk)dj4?as3?*TDiqm7cz% zEw9aSc}<{I$(#f%x92ih|FBpElIJSEABVR3&3uCDjepclVdL&}Ogxgj@MictX5)s7 z-6Z61CjE1-B=$lDya;X->L;ObU>U6UlI4rGatmA3DeqXRN|E|C$2%$?oA$0kTDh%%B^kMg z?3d4#IusC|_g`Mh@l6$IobNw-Z4~^FVx{8d*m#^M=-V?*;;i+*4&1E!CyK%*A9k`E2ydc=2&=oZsrm~7$xl>lZ6}{;o zYvs(Dw!5sq>&Cc{ZKMc?{R!K*+gv<$C%nV7cXEHfZjW)nM0=$E6tkMNQWB$`@U0-{ zsSVO%^w0?h<1yDjDPG^v!fC0HiTdZv5Q4Wyrs%3pZ=_f{yU!!FxN0d=V*igaDh$o5 zN_y(B>~$CPx8}(%dkmUAFR}pA3RtCVQ$V-ygDOouSA=7CM{5)q%C^24Cy$-JMnq#h zKi~W;2FZLqEvY}3&@H-NxJMxxi}9O5(aGGm%nx7cNh!Ir0~)OuspTE1czAsLH}ZJvFDSG9_puDi_6V3Qi;s@WvPhhL4wh_(i5e|l!$YPF4e$i-V&jRFho26Kt z)MiTh^-al}34J>q-(k%6HtdD-cZc&UzbpK#-QmsPMwLHvR*;-fL(E_r!uh2d&>-3*tdWvydmG@v!5T)UbUQ?yl_>?9vn<=Wo2I+lhH2P1M~=QfgRXhJG>nPh3cNn3y<+#hSctBBL+@dRnf8`Xsp9T(rg8Zko;Ow)k=e`M zOkteo&bgb3r@F069wsNB97>!NuR}JU>J&;VR3?+IKMK(}!A9Cc+SuFUjv_VDE*Uj5 z3XkO%T-+0H%t+qa=D}RsSGbuQ#ifO$ZgDq1!;|VT47I7`F6;P&DOTTPebx-YA0UfDv79Y?hz_+k66kQo}CrJO_1w%m`VwpL}Nm`$F{cgAp6)5qMm zoVlZo6q~skT+Vj{wKwM+NRM7s>gYT-<#YLc#DAPsyF9l4Xv@h|b|#AdQ$jZntow8P z43V<;KwX2;RQYI4x<#E^xPkTcr_pcHX$3pNn&p@5@p|;R%hEU!Obb?2k`#TbP*e~YS$BsUgrGsi*oI()7mn6s#wbjwm>WF<%SG zm2F;t`tL=>lyXb8O0l@#L(i=n$IgazT0?c8#?7cOVW}UyYN%RiuG_M6+O}(^ZWQS3 zpw2zp*H|WmhIvr^yo~=m<-N^-f0^>$a{e;qz2*FI%KP8=pMCW&^#333FDnKdP+BP1 zKLt=Q*gprq4!~am0d2=$!e6l7p#O~ZhW>A?tNoge?d)xLG{4C#9g${n-W{cBYS5EGIX{LZiY<-ae_?Ts89%z!5Lzn=4I zHRSBE*kHZdbtX5sh#6aJ^p+faz9uAqzY}GcXQ`&>Kre4Ka0h#OjBVky>!^Dlnt_y{ zH5ijGleztKI#DQRjq{7+)ZTdf((?2W_C$1WeumfgmGC7}dNSO&~2F-IOo8#~U0g{P(WyQ8Zc(-P=)RrHjW*RKro zX-d@dUaG0y_hv>CFS~+nq5~|}%86o&yU^5)FJyXc=jul5_OISMq-*OH1i~a`-`dZI zQQGB%WhfF0GfSF3X;at1Adi2%)$a4EQ#osn+{GUsdjI;)j(xfPXlZeOzqvaP-o%*O z2g3kXogusMM;&8WxEz7QftxarABDT^IApd_r&6e)irdMJ;WpxY-$SePK^^je;d zE(VMvr+fND=gqQwc57m`RQ-Eilh~E6g;K32tw)=Nl^n&+1`d~6=?GLD9|`T|Lmld$7p1mZ#h{VTI{aVhNiegEj!jcB4<$h4RVbx)G=zfZlaw(nr=G?BV)k_k12YlG&eg z0*jfFsU57vh;_3CFo`#bEyStAW^L<8&-uHQ4@>X?9w$uaR5wM zHlpv^L|+_Z9*BhuHiQ!n-#rsP^~Nsup; z*IC6H__OUiwAK^$VQbE8SoY|N7Q&lZn1RVetOHqsHbwYx+g+Z<~UgE$P>rqrm@Eb|k z&|{+vtV(KnzH1p94wJL*BpIbveFqm{PBstm)LlffpBxbyjA7y)wA3gUSr}2Blr;4& z1=EC;Wt};N98>ZKzA|D^<9c-(Nv&NNpa*u~?*?vqmv|rc z_t)-pEcsnq>N7I%c+9>>f$EQ4F@!gdzoY$5(U)jYzX5FLXF z@d?!8J32#rbLQF4&AGXsxFufLLbG4Q>t^c?H<9nJHR=KAuKlha!QmVBk3UgU2iW1m>6H-hgPw9`>8Dz zBun_>IqvwLxPGOg!b1JAQ5SZ|9G3!F!Ue;WTg#PziZBO%=ZN%2;H>33K)GkLyQoSr zvnwld6n`X)Kc^=U%!6v!%Z58ig>e%>cANxAh@6$IZWG~YioIoqw={XbU}0Evpo5-i zodA-&(x8Ij#5(iM0Rol6{{zoe7n*Xae3O^HGbLem*S(N6eJ6iE=rkjCFKjJ}HEh6y zn$Ez;sqqRD`xpKlMZ0akQF3L;A@2xVj5;l_utU$QqMMiQ#lWTBPSeeIyaEX#7|AVr z42pw{Xn0F=M1-R9Gs>b}60rPHtuG_%0mu6Z^bvl0)_R1`xG5tiI5=$=cX*aMdAfsq z?tz{Co9Z7({f#q*1Lwt)!qwQqyW-bk3Z#i!j+M{&_avoFNYQW-!1OU8GcfdN;`(jR z`F4HFY3hu>z_oWwNutk-(Rp04z;z3Tk7e5vViccM%a7A5--9RzitePZeKf5H9yOBolYv1662xEtX-`^Z=Am18HeYCn*~?nAIA%ecI|>O07zbvUn-CNh8Y9 ziv(b<16?%+sX#T%nK6uU*BN$y8j39*cUe&KoABGF6HNnMq#Hho*Ct9oKc7E69XEx-epOmH{Q9=D1_`Bzs4X202>`G#{kB!~cROvrW_)`GS4*RRP2|T~ z8j)M9PF~~Xw46zRM*;l!aUN|I{KBT$Cs)MLDDTV9EpGAlI{2Q!t?X-;rg9m=Tf zk5ExhHBUazAJ~W-v#H~V)kh~Sitt=yUqANqCuJun4;)7{aj{MA9%7vm%#w#2&tD89+GdGx1 za_EBpQ!YLsB*BugwSc-fEUQ0XE>hwLujyd7sm1`lfC?MJpr)zQn3CDSrOdHbm_Oc} z#)Y-sB;$@wA;q?D$AJqknzE&3EzKa__?^kpeDghvub=Mzm;H3;G~CgvAN0J1weGl_nI4OkYRB9BXX~UE%bPG} zpNTWzPXwpQNg<2zl^&$DyfhGa_^6`DXC^(rXTQu7CLgS&m5r*IFItjko{ZT|OD+(o zI0xwkcBxD!ATvdRT*yQ)$r#0shrp*nLPmovO=tVd@p9`2u~CCOT>Dq5r;!#h;@jnIiLOy(qhL;&2L5EQMV4KI;mRbrUwQL@tqVb|g1&JSu z0yV1o-qWx%q&Q9V$CKpVW=H!!HhugkZ0NDwTmW4t%28)XQOlRjpoy1kwf?@Zp`Ye?j9$hSS0FJu$svUu zD1Y#!F@Z{fL=v>=bNEDyEX$_n?6PHq@6+?f;@W;I&L~bXQ~2m8)wq+)s?U51k!T0gBs()1SbI*10wO?-#wj(`F#Ju_Q%`ZAaA#+|0f(2>rAnkIiDG*f`4Cckx|{eQ99xGR zcVR|t%|q2AT7%}O;CDBgSab6Apa#Bd`y=C9ytS>?n^2;?vvWWA>bi?&EMgo6m|wp8 zE922>?Lsk(cHv2IxQ6re>uuv?pqQiWi96Kclu}?kzbhCy#F}JrSA~;p!{MAS8YA<$ z7PCG7m;)10$-oR3`9nw|pM&f5$}sayIeHDWM$@~h{ya3FKR;qZJ z+&$icTjmC{AbcY8qFq~^ZAO*oVWns2g7T2h+}BR_RTqXjBFdstIGQ0Q|b#--x#`5^(pHyF<)m(D~A3J2B7;w|_wsj!z z%?N7N+4@;KSXg(Zp1Dsg_m1k1CHn#|Xn2uf)%wb>l$Y=MK z81157oY5QTF*ZuxU7M7tl)qk;uBw6-(xSt*xTW=L5Ii|RXxo4>DJIT*zrSq?#?hMM zv}#k8o#>Agv}n70g)!5iGb*}&f-3HgZT1`yH9+WxC%$iFZKc93Jb@Av*U~kiP%4!o zuLl=ivzY*qDlLICF$f_$-Cops0meBCjz9(_Iu@d@@HA9tN6_>{pcMTTjtAYEQrW^Y zeVNnk!qzFr%(eExt#VqbRWsc+Sr?u!y>Dyr*kt~Y!Ox|pz8aC-vA>o#{=BMPEz_## z^c%qgB{-`}1tIcUT%x4!ds3rZhK-xO5Z?8$^Q<)IB1*3nOKk!qcpZvJ=a32SDtQka^#5DBV zV$*$OGDyBFc#QJ6--^*K{qGwc<=>CtDAOKDv0>!*T@@z2l{& z^48U$+#=&`M;ATjH1MuWe=tCtgpDE?b)`lR?aAP%iM4479^K+7?^+Y#R4%cQUkmg0 zhn620I?#CdN#zq!le!gt@`qISMiS+#KrM9VyTWSkoKvzraQD#5EXs(X_RXE2t>Xr5 zk)T~%4d1*!_t7^lO8I#tucA$?+y{xRm{{q`V7?EM1__QVoRR%#n=Uz6f-+upC#(u7 zWw4jt-dR-s-E|Iqs?T`mW|;cFERW@wDDhO^j zn9ww;eC^4f%ki8kJ|*~*6)E3?6HI)8qWRCNx`LhgTD4=0sdb-rIzF^9JH&(m;#-A7 z;+PXm<)ii0`YwBO!a+r(Mm4ih;9zDCFv{e;iYD99ba9mf3{9AtlsgtlOe9l zW=)QQLw}X5OdvnBi@y#ou^dohCMmfP|9)YfL!N3EKN?)pl2F=`Q8a|zIF$O9h%aqP zFB;-#9BK|O`BwbFBKYA+96hurx?-(Fg0wYj*(N_2ZM>#&Az&i2YIm6?zc=1<8&v91 zSjvLd;!FM5TK)y_#k>8tTqPJJE3yfjr#<3sS0d2aa$fa%hBX5^!s1}W(TNyO5F@ywh+ zPTeA~&e>;a$9RVMEum31!oh3h&S1X0Av=wh;)ZT(<;-B-{84T?_VNN*L`uQhr0O85 zp{!8v~dZ&)>(-e?hTYVBbNnAa;1;cH#C_GX5ieHLl z?g2N8`RxZ7rf$ZR_ZN(^*feE({#Zl?&VVHsH6fabk#slsx57$S{8qUqCwg=kG^Km~ zOUmt`nC#lhlQ6ZbLyRYYTDs~GyaqEE$LB8<*AnDJr%w7}{&c~4n>?>mkIm5b(2b|2 zV!iFJGVQ12BNWXFvAk9_d7EpiW;HDvO3l-zuHCI~-;JH6PKA?pe(uhNlY6hf;(M=e z`9ZL+c(^6_y@@TDadJCk z0(R`x>A=mc--h>_iVVdskm-<&qSo`8oHxC-#oJGsP|gB&1Q$ zm9C-eRHla;w`_p!E1{qM$&4|OgOyAm>M@!Rpu92`IAeXzYZ&B4qSZJFr6YZG5jZwnxs>V9PS+oRf)5`64HqxO# z=Y2$Sddo8b^O3Yx=n&Bt?;Gg~(yuq$4^7~PyDcN@dwTXnysw1|5+}BTC)ekazC9qe zIuaJD9bdh#dP^p?I{7n?vV4klFF+=2FQZet?KEs&W76i607grPJ=XOZPU9luLXFlT z#-^J~M?Jh{EaM^@R4y@V-Oi(x0KXOXduSL`9YzeA>y@Mi)FvITS{I6XXyoP;uj)io)Vdgt?XPul%`kxPBdy_bZ6Vkg+=nG9WNFoBF)q8PAbLDtsI7G01F`*t5De42JOm{;>xvbG}LfA zAw!G?smM~hUMfY4>0Gt%rpHdo3`H%QP3m&40-Jp$kef~0U#x_d z0?f=!n@td_4}SCpo(0ps?pQ#@k>JbWASQW6*y9JaqS zx#Ek!N=zXK=WaqhRWp|(k{5T0$JAOj>vocRx^+1?@fA(weWWzQhR6ZB1(!S zfQ~}0oEsx!?P|hA#3v`_A{xIT0*zI~nw@rG*sG+I#AdRH6>LmHCp^CnKZsv!M>}yrKq`0Ao9bid^Yq zfQx`@)wE)asEXFlFg$yq;uwz$B3Aq@D9)I~Jh(We50#l;GLZ2b{isrW6khcN0zk1>hme>0xt>($|;%s$RfzST>h1t zgBP8B979N=Gz7lacX}7z-O!Ws9np;?lo8J~0N$ext1DruBMl`@E-9RlLHZ)fNmCX# zs1{PNYYfZ*sQ;17?n7_IXrE?{T^H!HIW>P`Kuw$z!AVlULa%ZUqu&4?q=RpgcNsCI zHi8TmR5f@9O)~W(87y;uDY(P~>(2qR&R4Ksn~0x2;)#BEJ3G9v_}?}WVcs?o4Xl9< z|9iT3RBai!#f=Q~m3m(GozUn;^)A^^-PgcHpWHFRW^$>*NhEAX5h(8a%Ji>{I zB^V7W&xvhtvQFAi2QrupMB87RCDWRK@XKdt!}oPhcK9;>_$ak?*qOM?ajSo3cGHa- z`3cJh|AIhg%BD2$*C?NS>MA4$8@eAAy5R10b(K=8Tin7|j2lX8 z*jz)JJ2?~D8b!8AO@@L`9{N+0n1iY>F)=YitxSpd*oN!o#jC*^dM64Yu|PhW(Wh3) zWuZhlqCwO!@TI4EINoj`R(iZ>qYHFe)nb@;0+t~;bY@pje!q|iWWTgy7c)=k;klsB zI>eZm-9g8qY`I1%D8+gEuFJhc=WT6^Gz}-u!XcSu2%izfWojlf>*|oa==)V#d55aM$WJ$2s1iy>diGp?H-o` zBFuuOTGHrZu6vG4a6`?arqv$CBPp>ND`EP5PDSe6nh4>Lmq}6IzylJ#2dqn|V-ltb zCM9_LJR$LEzt++z-~w$?Z0yf3$u(bnAz^Fcull-eJx3tFLaa|JERC$6RYcQ|`Egy} z@0q1OWEnF)&v{zy%^DNmCU|jP)JTJgRHBP~B&+Lds(mQ%_?(P=9-+M;GZtA^-NSud zm}KURT=vvVhb9SGZT_yJKH|!p?`yRWb~IoG0hGimx4?)rzY?3YBQ8~6Mu z**f*%)B?BO@Q*@vx|2qH`709xMbKwM>IgNnTKmWFp2PNhe5~HQm0%r_L|YfT0THN2 z{&8mrf=w_Af{6%tpbnXkj6`F|V)>WE`K*q7q!qYLT!5i?#VDNE8d9O2X>!+%=8!p6 z&L2C;BaInZb((Q7tt0)bH>aszO*qr?cD`N)bxw_as=abU5siDf;lFIcDvOKN%*GZ1 zo4+gOnDD5(2%_$HY(Z*iohGNa+%|S8^OHrnHRy%-aP8s3&6x|e5P|n~5+gG3YY+yh zIan>?Jwn*Vs5F7gO(mz8hRZh_Y~{~1li`88$bRG0(7Hg04G%ZE=lyF@=iQMHK9$U} zC`@|7E90W3>xIJIlUMpV=h;{OFq~YP@1mzDOp@)0Oc))w!$xEj#3o!K)IZB)e^%z? zFn#EXpE@OIp3zPX7(Y!2sPA-UU|XfH^|w|}oL;yEDZ7>E&eUib2_mQ@SZq<}^?h0E z;%t~Zh_L6%Q36`TD!W~QA19D%0g96kzV-4nDl)1!B2@=!!>k~#J$)5(fw(H0y7mPg zyg1T7xLLFZMjw(})P#SSp~>bRvxBl`#LCo3)9T8qtXgxGFtI?_-VIlvZaK42;xVPH zrnr3fX=5E{>Uy-6${~y_kKVxW=Y{5m3!G>>1g#)r#jw+DJ3~M+`VK0;0q4-7z2h+6 zF2pP6(xAy>h)Yc-2i9(vayiF+a_xdHDl}uU10rO#J2io=DamEed$z5vXhpW7qJaGM zZ-o@C_zW~j<91F9IHKPyLpQfN1I%2Loy`w}bFP4GY2?@W6a-*7fX3%a|31_f)7}nb zJDIQN57$8zIGVLXHmLhW6!Yl_N0*D%yk`UUw43a{+8kG}5Ieqwq&}N8Q0q{=yM(eU z@0-y5>l10qjV87qeb)}SJxZ%^eYenOct9{Bt@|?wWuM3aH@8!RXCm!c8N91~YIdMx zB-R0&7&nseS*(~RE2+ze;0HHfw+^1bZ8#CmkHMj?8a|bCd8Wj?Fr?Mf&ADSEdtcAI zK%O7*{BqdPph&$g5CDK4-hXr0sBaEiN=T89N=#aq=HCac-%fIrDM|pl2@qCyQP7r$ zCdLc+E31oUknMwAE!^b1c@B)}j`n_Vg4ADUIoICVm(MX+)M2iRj|r7PH$0;UKMskbGjvh!o#7 zH~tZxRut~n-GlAMjrcjS61g}_hTrB4;c5aYfFr*KoZA2+eo4tOIIG6DsiXT`pV!F1 zoM!6#$D@++VT7#d9BC2w4~T2nR3ZYFk+`@fyt|WTJBY0o)St_kQxQlnPododA>JV!vAG|H8k&P zuD1i)S^qlU4g!h-_-E$LTPOTMe&gZDO8lqRABy~ov-5je>Mi>JnsD>apubLw|1L3a z(LaO!PQdwRlwTB_-(}z}`e&3s5_0|-=NBdC5AtTpzu^1_LFb>5{_~31{|4!ggq?rJ z`A_@U{SD6V1fG9J`Om8<`Wuu#5_>;2Im v|5_&hE+Ulwv1tCY;vXH!FL(XB+|m7m^Olu>c=xLq;@h9in>}3^ex?2oDfTd3 delta 8410 zcmZvB1ymi&vi8P9AV`9{2X}XOLXhC@?rs}*-?+QGYjAgW*Wga@N6vZoo%^5l&#YNp zU0qc@Q&la~->dbV2*P(M2uKtF_y+*sI3iCe20;V}{^7%4GG_JMRs1!w!zxgowB(nKOW- za8y2|QyTgLoRf>+sojyhAcL4Q!uQAP_qIKP`67NX2=1^K=qF3gWqEXLo zmZX!5BG+x=Fb~h*xXzP4Tayn2#^lA7#`GLaf;3|c)tMJEtWfk%t>IJ}gjoq0NSbXh zGPg}s`Pf~M7gEWuV4Ng0EV&elpdo?edU++POBE20!9ahGPW(&!P?$fbdD9@fL-vhsA!P}%a7+lp9~;LPXo0`XTYzPtzH?*Pb3 z;RE5`0}u!aLSn=Qw#Nxr^$;M5y!%~GeW_0M$Dk8hyC5C^HoJat&VxXq?OLd|v`x^QASMki;ecLqxh#U* zPm)za5igd6MhI~cn#YAd*1SDUJkd&5XCf6>UpcE9A+!xh5UJc8G+*M|k7ri#xUO1G z9u@YbVQ)8EBS*@YiAKlBrs5D4*PNF9OUJ;T`2E6>_YH&=oD$(%cSO1ClEH+rp%M@H zPuOqFoZ?Z#ZRzOZJh&;|rmg)bjWEZBM_67B>|KOxv*=S&KPzj+*&DdPs?wLV$# zM)|!78ftz9GAHh-dHPlMj%iPO{Ozj$^OU1H#YQ+d05JBRr*QA7s&8%OU})v=CtJsq zc&u0Gk=CxMKF}r`EPo3Uu1d@1FU*tTa-hPvBKfix#4n#PqpclVgF+edB|?)V`hI=w za*b(k?L|Vu0rJ>?3U$3~9>d277znqf*mUh^8#eXwYG!<-Mo-?Xn+|>oXHEBrqQ~M&>S<&-!Va(9oiG2=s z+P6jq+pw_sA1*D4mY z2dl;>UxF^yKO(aF)go?%10`80tY+PFpjHDRg~0|)W`=&teQ@@vA{BB0=kj8>tsDr# zKLLdP^zJ*@6f`*~jvP*_tUDoE+kQb6+-X-0I<6_A@biTL(FO|ITs)%)^>B_?bZg5P zL#f>0x`i9@lkVD2c|%kARt?)!xj~x-7>4OYQgir|$?Bo7Uv9`AALW;Pb3?_0scPfZyDWP|81zS8I85 z*+#X;XRBs&;iEwkm$|HLi*j%<5>7ViOrKfB0Ee=XF-yiF`v+iQ2i zEHx)tfKg(%`rH5+^-Zh!1j!ZR9fR4(l^14jADCa&qj?)b@vEf>(0WgmY_VOO0@*od zeMI-dDdHzLaqVXmv1&!wA}wSX;E~9ZiPRqeB2x^tr}bZ9DqY>H=M^1-a0`{{`L*Ng*!iXXgUoLTpRw{jz0~r5k{jMG{w8q0Xv{8##l8VOhmQ zU`XpQNUdeCE^R9^y8hi-@I9Et5C>1^1!Xe&)vla)q`w5jCH8-V5JhvDp)8SQ_|$cT z#(WykYsi){2BOL=QQpSl+HgCF0Uhq_7Dy~8UkWaX;<=mFg#>h%<1sGWtHfY*n8VQ; zha+w{LaTsqmxtIc78s}c0=qlHqBzkKrd)>#Op~5Y{0P0TDk!iTdNbea9M7|fo4L&< zqsQ1DBWZiB<;zIs%W=>|OAno4szyAOB>N_c=vkCRihtkJigf2<2lkL(vDRq9gs+3~ z;k<0N#<6#iwV4UGGQi;rf+O}Erf?d81`tX{QcZ!^UNVh#(nN46TsCk-bX$qPs6$e? z;CVjcm8tb;}?lFN9-@$+JSrg zrAGy`VuR>*l0+mIW{P~wR}BM@8p~}NRrSQAC%|x~Kic}V%CL6YkH;*P&MuZOb`1|Z zj;9GcnId(kDVSB?3}`ktbb$dtL^cPztoA;d5)cK;Nbrn15`>EQpq~iXa#pNB3-J$` zix&mOFk3M|ccBj<+F6k_R~4Zl1Y+!{p^r+|Ef?x!+WLsY0|?* zw2H?ilQxUlVSX5zE`A)oPHNx~Em zOj553Eu^NYi`w&1GJ>eh@(>UD0q^Ii z8Rxtmz~$gE1*Xv-s(Hz+Vbo7!J#uYL3k4~~#AS1<`rTE?-5=wcIIgWT2{FJbae!B^ zIC_a$1I5KaRO!@3pMSM#K&DUP;sCf z52OcHks!qJX?hm&Pgp1gw)R-$;|ao+W45OAH%tDH20vNL?H(zOBD@3h&CWQvg5kER zGDsEz9#$dL{5;i@Dl>UMEk>NwVs-T0OvYlE={F|fQI;6^Xd1+zPvHBmbRPi&zwlu7 zKxCyRrk~|mS+WL0%Qex#4u36aputq$G1NBT3mwPEq_aXY;)-@=n_;&uskYliP!HM%P`OX{F1a2A$URdlH<10@rLF8Q+9w>|}4kTzJOaT2=cfzI%vB ztv~sEI=f<^v%DCP5rhSn8a7XvkdcJU4zi<9#c@l4wB_RcUvqD}w zH37lSe~Y=YsM7VwwR^{9qHM+8=%eez3DG(+I@(bLn?Hm`chxn}s%IRP@uaO$S9#86 zgO9RGfDQ*msXlp|Lf?1plrurFLSM7nF;7si{TFoAD`{EFVZ)IiVJWSfx}$5skHnYq zWlmvW=_`YvKcC|Vmua@XjzK|`u{esK~=}p}9-ax+jB~7wCO=O%m1S`!A z!!6o%^WT$g3wBG$=qIf+%b(TcDxWJ4p5O~?MHpQc!jxmqL)?5s(-KR}@cr^!DJA6M zndQAb2MxL>?v_G%jT?3=$(ggqM8uL;6>wdYk>|#4TUZ`|U%`2zJwc{re{hmH>9m$Yw~o;|e9#lS)$X~b z8{F;S5B^z@!f_OR!=h3UHGvxf_aA%oNC>E zKoj5K@QK_3xPwk+^ySja@TcSzQs*#>ZZjb|C;_=y9}QZ1Oi`}mY1oTTZ}@F!F2=AZ zH}v;rcyY+w-f)JPO5$y@2tgTC$N))M;gSIJ5W##C4h?RD7jRVpL|2;_ndG|OVp>pH z`;C@s+c&XnN6jiqbKit?EWYM0q%XF$o!bCkGw7rbm|k#P$BZ)tWH1`)xX^|q1>fx5 zFcjCMz>RX(c4FXsV6o+jiQJ3u2-(9cX6)~!2WPbV-n)1iM3w$>&n>BJkQppAxF5^w zMak$Z@|92}4j&9XsdHO{XmdN7OQn1P?ImL*lCrB{Az+ZnQ~PZFhHX`Ch&b>!&tU>M zD+n(inByp>mkxSmz2X+nGYS_`FXXy~taz4mc>q|g*}W>Au55;yhs-pdbumueCjWeT z5~?!Z;AV$udMvDBNwEW-xD{YYeX>oy!tElzTd;79IQudZ8R#ug+ybf?@kErH-H}dt z8zyF5(nmSHE6x_$Z8kPToub&b4!@bDqQeygvhEM6c{&t`rgq&frIn8zSUoSElb-gU z6GlwbMYhiN#02tF6RJMNoew|S*iZ_84$^rb4=_4bW3*90U?Y)NZvnt`PD$2a^>94Qlwf1~xs4Mt8!Riz1$_rm&E*DCk6ewBs< z&V5kqo=H@KWS{s}siqk8Aha4wmqjp9X++Yl*R9o3Ce_@E#&`S^S6LRc*vdYHvFBF; zQR>$f*br=egw-bbnYU>&Z_M;4WkPCg=~~|TwQaUOBe5myAph*jwYb$y>!>0#u|+f_ zoV$EJ->I6EBmAR;OdvImOI@2H2;UI{m~TNzOVCwoNuH)sU+TWsUSV*^_X#DWZMYMW3nl-t~b zp=$0~4k?<@fjMs|shRD1 z2`QuJ?M=67OrjZ%>z&Y8OC5x-c_{Y3Vwt%9?BD)yBGusB8AgEJhEJniguFdbxc3=j z%x&6EA=IwG+KfVcgp@wzJ}(W9f-~MjSYAYp)+c6;sylV0p3o1O##UOyv>8$~ObeqN z?Kl};R4QG+n208^(_{&P7zCUyjz!Vq-JUFqT>w}pdt&)7(^Y$P41yD0qvbL_0)gh?w#22!djmG*RROj2s zL6qbODV~nUt*`8Kege>E=XSy7I3KO)r}0_k7Ns44e$RygXUu`ais*p9jG^5>~C0eN5{Qm6rZKSa9+O0>veEkq; zKcL3DH}f zEJ3D4i|TokWwjo7d)n)0&r%tMZ+`5i)Z`rmlIimp@S#jc7_>Fb#BkShW8z8t@)fcr zku6sa6tm+=Y|SdCDc$Hq+uPFV*?2*_B{2-re7#+)0^?u)EJ>6^>{taJefVt3O`8fd)3DP zx$8-=@P?8JT_a1`V`a68#|$SzQN*r(ndgaYF+ES94;5C%B*-U%X2q_G%}JCjhczbU z@c%I3UkrQyl+scSqVm`hP-c0})p71kzahWh700NpZ|<|mAB?P^m&Clzw~8_<1rGD_ z2uC&BtbGfJApsHp0;~8s6(+Gb=JhtpJc5~7d;#m1yttNmUUqcbytojdYkADOAvNMp0lW30 zSkOc7GB~jdSf=#knU)c|#Na|>qObF7A{a>M54mLmwLD0HjXN}g{x7(dQEV%;Tiq$a zcjzG+2u_+&pX~6kTERBfiK&&0fa#e$GXWCvGD0-`*&_w5xG*|j&G7DT6$U03n-FSp zA#NlBPhdCsH-@T%IDjt%UXFi!kR76X|EWSaUshXBXm+T-d3{tT2F_W+AynLV zmicud`03Driv#np6w@HL+ejuLchZB_{oh=;?qU!}^Z`K+a&4;HG$?P8*U{E$`0?&bz(FDAJ&&U6Hxysj z)QLD8b3~t>n|*f+HM7;LF(T0?vkBpX!#HX~L#1RC7o{cQo1E0m)i@T-riEW$mfgjviQmRC_31@Dn0ZdFf5!iRkF$utFXktp0Dw?30092ak(Hr?gQ=DAUx!xpYMS=ToJgJ>9Yj0# zanZZ1Y|?@-;-;n&I^L}IPh=-h4#eg_o>-jCjg5GC(~s==4YzS2d`|guXBqbMs~#$S z+q;-?`{dSskFB+vkOslXiPtNeD|zmZvKQG83!lb~xcx=_b+P=I3$enp#cp8RX&s4M zJ+bp7S))xWuw(nOu*ocR!0n#Yc5Y`r9Zdo9HcqTRdD}Fo9aPTdKulR-w`kmGTVfx4 zO|yfjju;!-uhCq#mpfPXE^phvoZDEUy`xh=)P2X zqG!Sz<-HoDMp)vh)YjCBuYJJ;{`Ro7nYyh7R?voF;?vo1_y%9Wal(-uWI5wVBSX<+ zGDk>eCq-pgP-&Po4ScvNn@E}zOXZ7DwHr2sqzb;%cVur~X**G1xwQNy8;DVFK?tv7x*#SyiiQRT|O{{qtoF!dB2U=;N;cz#KX&$i2{N% z*4-WqPO#C6~zxj zn4P2{oN+}iGFp=&HfOohZ3{!Fse)iWM>#C~r5)}`G6%s{44FqfjXKi@`+(x`K1vEsd7TWi8KR*9~9j9GV_a_7}+< zTfP|!f+4Ern@ux4Vz%x*WpB+N>Z;)3o!_=}(6y-4e&YFL0Uuq+G}noXV<$?artu0V zdE}*}ZD`u& zbntg^*;E$|1IFsP4@bUsLK<@!P#_m3#F^l%@2}8_e=owKK5uvSpG~z`PP^lov<>8) zHf6HeFv0sctE_U~LMU_lS~EtfaJ+K^Qoru%zYi*Kvk&>$qG77v=LrqV^XrFn4pJU? zCo=BJsRiQAEIB{ViR@b>dP(H*limZ5#FFK5QE-a|0k9L>m=x!=n4D)USbTZOHe#;0 zx8sIit~`Pul!mQrQA-2^4`I>b} zt3lM$8c?UG>4CS#M0m+p{zO3O$Jd9U{a5Tkb&p$3F1Py!%skQ}sAd_YFQ@dFz89S< zjgrgVwR}lc`A-s?aqS*+)uU4*HpdI9H7{KKiUECL96R(%D-8y-XyNlRgteDP^vDI> z3+#s%iqD-{6{9fLFy+JqP-ZIXBSg91*1kXJ zILPl$82zyRrf>;|ssZIB0r|7y?6rzRYV=|Hn27-{^vz4RF8U57japBfv0Bb}Z{FPa zOk?7ONB^m3wBD((xBg1G<>{>=?zQ0Md8PgC`BC9H>L@j#cd1}vMfL6XQyQieCwHr8 z2(nmP3P75Sr9=3R3%b6;#Z*8`&o7VN6oKMXg73&QRA=7!=C7lJnsJJ3*mwK}>>X+R z2Qmi*Gf`^&&G`T0=I>wszn9E=`5zShA2a_3djSB{|0w)bj(~xt*f7QZ)9c^j|3*Cj z(SUu2>;DV?{C7W~P*yD9&-W$TQrF7V$k5(_*3Q7_-$MUJp8m0Z3jNZv+6q OzTb53aj7QwQ~MvB2+;ii diff --git a/test_unstructured/partition/odt/test_odt.py b/test_unstructured/partition/odt/test_odt.py index 2747f9b43..9fe9b4b99 100644 --- a/test_unstructured/partition/odt/test_odt.py +++ b/test_unstructured/partition/odt/test_odt.py @@ -2,7 +2,7 @@ import os import pathlib from unstructured.chunking.title import chunk_by_title -from unstructured.documents.elements import Title +from unstructured.documents.elements import Table, Title from unstructured.partition.json import partition_json from unstructured.partition.odt import partition_odt from unstructured.staging.base import elements_to_json @@ -14,7 +14,16 @@ EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "..", "..", "..", "example-docs def test_partition_odt_from_filename(): filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt") elements = partition_odt(filename=filename) - assert elements == [Title("Lorem ipsum dolor sit amet.")] + assert elements == [ + Title("Lorem ipsum dolor sit amet."), + Table( + text="\nHeader row Mon Wed" + " Fri\nColor Blue" + " Red Green\nTime 1pm" + " 2pm 3pm\nLeader " + "Sarah Mark Ryan", + ), + ] for element in elements: assert element.metadata.filename == "fake.odt" @@ -29,8 +38,16 @@ def test_partition_odt_from_file(): filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt") with open(filename, "rb") as f: elements = partition_odt(file=f) - - assert elements == [Title("Lorem ipsum dolor sit amet.")] + assert elements == [ + Title("Lorem ipsum dolor sit amet."), + Table( + text="\nHeader row Mon Wed" + " Fri\nColor Blue" + " Red Green\nTime 1pm" + " 2pm 3pm\nLeader " + "Sarah Mark Ryan", + ), + ] def test_partition_odt_from_file_with_metadata_filename(): diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py index 9137f2ae4..4b1d7cb2c 100644 --- a/test_unstructured/partition/test_auto.py +++ b/test_unstructured/partition/test_auto.py @@ -554,7 +554,7 @@ def test_auto_partition_works_with_unstructured_jsons_from_file(): def test_auto_partition_odt_from_filename(): filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt") elements = partition(filename=filename, strategy="hi_res") - assert elements == [Title("Lorem ipsum dolor sit amet.")] + assert elements[0] == Title("Lorem ipsum dolor sit amet.") def test_auto_partition_odt_from_file(): @@ -562,7 +562,7 @@ def test_auto_partition_odt_from_file(): with open(filename, "rb") as f: elements = partition(file=f, strategy="hi_res") - assert elements == [Title("Lorem ipsum dolor sit amet.")] + assert elements[0] == Title("Lorem ipsum dolor sit amet.") @pytest.mark.parametrize( diff --git a/unstructured/__version__.py b/unstructured/__version__.py index ce219a9bf..9048b6de1 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.15" # pragma: no cover +__version__ = "0.10.16-dev0" # pragma: no cover