From 2546d41781f24ee58125d0a5b13c8dbae56e4baf Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 24 Nov 2022 16:33:58 +0100 Subject: [PATCH 1/6] test: add test and toydata for loading multi page tiff file Signed-off-by: anna-charlotte --- tests/unit/document/test_converters.py | 12 ++++++++++++ tests/unit/document/toydata/multi-page.tif | Bin 0 -> 31736 bytes 2 files changed, 12 insertions(+) create mode 100644 tests/unit/document/toydata/multi-page.tif diff --git a/tests/unit/document/test_converters.py b/tests/unit/document/test_converters.py index f1ddf50b3d6..6c010809e0c 100644 --- a/tests/unit/document/test_converters.py +++ b/tests/unit/document/test_converters.py @@ -56,6 +56,18 @@ def test_uri_to_tensor(): assert doc.mime_type == 'image/png' +def test_uri_to_tensors_with_multi_page_tiff(): + doc = Document(uri=os.path.join(cur_dir, 'toydata/multi-page.tif')) + doc.load_uri_to_image_tensor() + + assert doc.tensor is None + assert len(doc.chunks) == 3 + for chunk in doc.chunks: + assert isinstance(chunk.tensor, np.ndarray) + assert chunk.tensor.ndim == 3 + assert chunk.tensor.shape[-1] == 3 + + def test_datauri_to_tensor(): doc = Document(uri=os.path.join(cur_dir, 'toydata/test.png')) doc.convert_uri_to_datauri() diff --git a/tests/unit/document/toydata/multi-page.tif b/tests/unit/document/toydata/multi-page.tif new file mode 100644 index 0000000000000000000000000000000000000000..66b9ee395a4aa656dd2898ba42fbe25ecd5e2bf5 GIT binary patch literal 31736 zcmeI5bzD?m_wUcp-QA7EfOJWBcXvs5D<~)>-AF2p(n^;oARr9gB}hvnDWG=-eZGF5 zU)=k9p8L9gU53{Ti!=M2IkV@?+H39oULRCd0agH@<_>0EQa>9f03ikev>L_t_=&M2 zWvny>3-!RnF>=~@$wpvX#^`@N9RqksG6;8ScWUJX}H?5uq ziKn$~N+6#Q05XNLz<4zRs@~5BIuh8zMQ|1R<$6xK&1=4G=G6wydLoQz5W89<7wx+) z^;z(y^Ad3VMU}gLt!8feEo-OOk2@^f4el`!DZ3!eZS7E40IKnwHlBv}iuJ+i1NXfQ zTi1Q-A_2@}^<$Yv#+sev1xk4@aD*6H9SucNgznt0(l7Csb|$Q zz@*(yAZKV(wPU~G4uCrz!zW^#*mBMM8Q{LB84-EM8SsB{bxm#8u&{p~_{dzm$eZfK z769$L4Vc$Dio|{EI3&fUA2=3OFDFENpWsZ1`~fH0=<*%s-1lhl1N3&qR!%aGqt<=p zDO8z6c2)dtToqly!r_ABA0ixcGJBowJLuRQhXjU{J&8L()V+Af=xGqfO{8$ zU%o3cggVkF z;EyRW8A4;JU{&o2LX|#s)l`IJavg6*UBk)L3K!${R4W$#Q9NC^C=Krl56kH^aIE!P zzENf~9ljGrNv-6fM2Fe5iVCN)Q!U2(x<@C>ca4cLyFKR9S@m7$_*XyMO%ugJ{1%C0 z*ZdYx!X`A8PZ&3XKBwnxTA?c3?`6aIX9I?N zPjB4*;(0D-Y>cNTD`)@C0@e1iFO_)F*7x}I$D=-1Ze_cdx!dvE7n>0g`++YwA((38|AF6+clQ6)TpjJ+$bE|nLwjlso z3dMaQGYd$ZGb1i_kvbBnKh=oClhRv3rZQFtsj>ozlc=IEhvGe{4kzg} za6zEW;;_`Q*!ZLCei5S766BP`e%_oq3rU{;v@l(dLveArEd%!G?l1K2R%h1h3 zkEq+fj(7#$Dms20^pONYIvZn6yC26ZH$6XGpot!v*e)!uCrP28;}tiuCw4dT-~sh` zP=Xewf6w^|ErZN6qa!kj-dB|LlMFF2Z={qIbu}6Cywj4kT$B|f+n5;<^3?GH=0CBh zvW(p;4AsilQyR}>E%L5O(`qLWYt%(vwy{a(9yC)PS7c{Ae*auS$wEaamTO|5j;=aj zLp*Yd{=UperaZS5M5uyq@#>T3=;HkjQXl5^uH|ewN5?@Sh|oaSS`L1tFNxxWh+XKR z{g@*nq{mpGI50U^d+!mE;b(DsM3C6n@H3UZeZ*2i)I8bykB6HnrTZ!{^9j_VI3A$L z*z+_*Xr)Ds%-&PHlS*Bv6BDCFrl`0j!z@+x+IGm1566CurC29FSzFje)}cb6L`OJ# ztT{-+VOP9VyYEc`PB1);`pGc?&>ISMu#Gn0FKFZd6-{Fd|G(&9UuxLv2s2dwa!rmZ z9%1T$L`Z@HFKr@aM3)`JWdQoi4j;2&V9W0J4 zS@_5igX33iK1^B7@l>`D)x4CQHM(&^K2!Zk!dRQ{m2oytan!g9U(xIN>;{=n6QMci z(E8~52rJbSrrOS=xnvi~p48@RjehOe4b#MspI)j2plZuLV4pcEE&} zj8kesAa*TaF=1`WcXleM<^jyXK5k2IeXp+X;|hW~Sc0h~y&_sYDStCYsDmxT4*zHn zBp&%?dQUWd6^)4tS=WDlH(`xbO*gejh(g*!OHa)(V?CqNIecwm(9&t$us!I-CP}fE z;}=q~rv+PNs*t=D0}i1gN3ysgi>>F3Jx?7U4AxbQ8&xWo*w3za*X$&>w^^?XF!k5% zW;wroyWuzau)#H}a+hL{*c0sUdbxM~u2Tf^t?hFlGG^x?b~Xb6->7sQy~ceVoCsR z$s6~H8X1bDAQ*AZF@*fCGk}4c$sY_*zyTOBagizj?cGiKmkR!v!yt58Z<25gh)UcN z4N3bnr3yMGi?FN1a&`c&>4#2yMfg*cHqa^@) z*->GXkLL{lkaV|G0YW;iFpogX0ANl}%1efN06?=aZeo(4{4jtEe~bWt(y)Wa)uM2f zSY_?2<>-b-qZ9q(H`o4Zu>0Q; zNo{LKG8I`(6eE{884ih4@R2kJ6jf=~biz~Lu^U=Ls{A$vI00TJt?7i=_67iz z>6;=fio9`hj6|{Wg<1|e&9?kG>ZLl4y23?z|5X$j+VL{Mp5X`C-H5LHi3+t=d=&8t zpd=QCOik7_RmNSGn8C;V*=i+3G5>IKe~TjhL|>E|y{vxdElk6aYf5+ceBM`_HnDZ} zO@K;ez|#-4cSxQM>`VU5$=yVe{5|>g8sIZZ*+I6lmhCEiNb03iNZKBgarwP{!olMQ zL1uqBIq^dJyuugW$KP|?=8RhF(YNh8__v;ZTxf-S+x|xs8N-PBA>noPWkprLN)uy+ z_Ejt2_MFvB)$UtF_`7}I{J(t|aW*CXh#O-|`Z2NGxlkG_h5u`3)wUsyhzEHaoXidb z)=|w}9XH0lBs^ zlts{04SP*&rSC$=OH@*9*12lPmCC_w;0!etl~2 zVbNBmYfeey#X0C@9&eh^^JTu+_lBQIsq$b^WOq$DSz2kR<1Ay#gBfAjo~)g$xV5Ie ztk*$rZR-cbIkrho<5ihb)_0*%BqwIB8-9^)ti{}Vbp8D-U&==Jt158+H)46bueNgw zEOqNXX{3o7fkUdWD3au9Gxs~8Hg%%4_%Lt7%FjhUrbDnO(ocYWk@PX;uy5vP`O4$A z{nWLKT&IoCqh!S7L#GcWB`Y0-&vKwqI3LyTKvjk$AFBuB!Faa&I^RV(jPF_Y6a=y&26B|t7zvggTdDAL>XcI=;o5UUqi#t*Zx`&)N zR)jEC9luLs#qeG=S8vx~zv~}Q=N!PtxJm2=_WCLra?c7lW7x>Ws{0SV(sR__OOnCE z7!n;y-nkYZCcs2UROJetB2}#^*g)^WPcFfdf7>j7uYa8KcVagdw}zG^g%`@qqUq5h zb2)OjqVuDdph5%|ci2J`yVrK=g+3V5F{-0Eq}n}NUE5&YSRYekLn2jJ+~NGrgh6)# z*GyBRPL<-*w|p)UWv{LlJCk3D9osv52L6=%3!|}^3ZYR#lgHL3Qx&@pKNCZa3rtZU z6PgRjp=CPqo)CK1QQzycu;u5jlo6l(zH&YO=)*V{GR;3q?`lwkMGH&pWD|$|ljx%~ z1?v&z7EaVK1C&!;6=89QOIFtmH*JbhhR&-ri!{OpD>MI74`|$>mReMQF-o0F$G5BO zXD>L)n15fOIHO`abFZ!Hm9RUgl#VQo^QE;wOi=me9MVF=9(mlF6l;~u1j>TfqsJD; zBDxHTT3Ig;VTqkvjdSE9Na}uim~P$NOgd)YQFayB(*rl|o4L`kFS_N@`OZoIaSo^I z*suu%8}ilv<7LnO7B>92Z%&oVT<^8sVX&L-bxn`$RXBd19~Ecp$^{OwkET^4*4gK^ z1tiPfZ8PYyFt|I6DL)RrhcWft#)}sn?YF7BiAp6 zPOg7GtD3L<{8Y?uFd&bszWRjR={iJ%14&|(=I4U7okx}I_cnp>6AN$Bry4Fh?~yyU zjisynIl5w61cgwSWmvoq0@homg!|14ksd*6uB1v`Fqb>c!(xgyBm^x%My)&{IFl4z z2C=v^3?g>br^I!7R~M$q8_#gnSY`Ey@m^x61Zq6ny2m_lNLOnaqZxa@JN(m?Ss;RA zmuQ^0Pdq7os7IWV6^~%)2zI{Ls<&plP|x%=^%#KABbo|oMZn0SBAaPmR2vhZ zm)ez`MX3Vh)6bT}-B4biD=DuBEcwJ4JnX+?4Iuzh!BnxymTHsDDMkD8UI&(neE}-jbDkr3=0u z7h2=Dk(tZk@h#RhDnI$^J)LmGMBCe3rN0edTgFQjIh@&PxGh~;?n(A|(s9gq`)PQE zx0IPC>x$`ir(30GxJ~~|y2>^rq*~=m+el`t`Kx;a)xI1bxyjKr9p+C8k{3VD7=sDd zlhpj&>pgXp%xxTEAHPX68#U7Bx^LVv5RpulJ%QGw?s!sMuLm(UxD?lMSko?<99=Y? z5OR4n(fCH^)5zR*tj7$vRAbV0VrKS*-q)1V*XeX$O?8Ic!9F8z{M5Eh=US60yN4SD zlol2)Aps-0%N1`}kIm-hqd9d^+S1#gVk;QN8JmSB%um))wqFKL;uzzdj zJ!CK?Vp}en&a)WuG2raZnkZu4p=)SolQQ;ZEM+*a9{0$1bp;!~>qM&o4w~Ch^LEs{ z9W`%9&D&A)cGSEbHE&1F|G#rhIaJE^X#gb`X$@I*EM*rq{e#}__XI=(_`%V#=b(Jz zOaYVEs<%;7@#zw}{GR@aF`>O9<(W!sAkW>p8KXA3@DMh~4V(UnqBe&_b}g`sq{jHL ztG$^ha+|-&H4|-L$rz36nhSm!%b_)zZ-C8lr&sLEn4CC&X4PzPxu1|Dhyn`E5opJ} zSwsvg?r5g~EIZmxoDEXE{lcwD2Ki!Fp6@YRE~KV$#@pOX{}7p0B`7r!Hl^GwB0^(l zeV?`Zh4{>kj6LWaH#U`p!SsVkYFKGY(9|QpK004kMPOyjH_9LhbcVi;}01Z0`{?~Q@5xO1ew+#oi(SF;| zgMe@WXj@nd8U+yE-?kI}wxQ30h<@8fP@DL#_Pii6*nR-?C^Qrx^1o~b5XIk)bLY1W zdtK4L*;;>VqkUt;&eIR1c04}cHo?7x!GzNx7G{}Ph9DoWS z0bBrJY3=2%sx7AjJr1BKD^24CZ3MIb+)gKhDO@W#of|1004fyXV_i<9;|O^pqbYVGFi=Hp4@;Vx#1F!)C zfE1tt=z%)`7~lp301-d}kOh?&489cTc9J*cQ|-B3^)Qf zN;oDsPB=k0NjPOV9XL}sJ2(%x2XLWqad6M!^5M$i>fzqQ^}|iTEx~QWoj?Z-ba*0o zT6i$LAiOlZI=m75eRxm!NAOYbY4G{*mGI5*J@8}jOYl4J7YIlQ_y{xzU<45a1q59L zYXlF3M+h+pnFyr_^$1-EV+hL#2M9k9F%c;c*${;h6%h>(?GXJC!x7UEixKM(yAdZ4 z*AP#UkdTOwn2`jK6p##&9FZO(#USM%RUvgCjUcTcoggD4lOnSriz2HdTOxZSha+bo zmm|MJ9z|Y5K1V@Ep+@0GkwY;;aYcE8l7{jcr5$AyWdr3B6$h0ORTxzr)dn>HH6FDP zwHb8?bq)0r4Hu0WO%zQB%@HjG?KxT{S~uD}+BbA`bUJh)bWL;z^kDSo=+)@G=*#FA z7KO?Rr9wj~_At#X_ zu_cKjsUn#m`A$khDog4_nn>C}xy@aR3}G?FyV zG|y?eXbxy8XccIEX!B?XX)o!R>2&Fy&{feb(4)}{(%+{~q5nXCz(CEQ$`Hs<#xTo> z$|%Ta&zR2G!+6HT%%sm0!PLOC$xOnm$ozo0jCuYJ<{j}n9(VHZjI$uI2(mb_snk|fm%J<_}b>$ueDEgq;%qS=5+7q`sjYp!`3s=d!=`# zFQcENzhuB^5MTlX-MrP)0)@F`v zZfXA30?|U>;a9Xa_w@Xa`Sf^ac6T6 zcVF@l_DJi3&#rg44;Y+iFgr-5a|#(_>AXS#JJ+18Y8)0CZXUTe(mpCa`hHAi z?BlrNc>jdj#K@%X?B!hWJmP%R0?tC(BE@3tyr&YuX?Rst%a^*uBUBKZ@k*%-fZ2H+Zz6A{B>>Hefx4Jd>3~&d+*L( z?Y_kRz=6TR+M(y+&!d=cq~A)8d5=GwXq+sbx}08~MV*tLmtF{7^nBO` ztQ0^dybI)3i2aw0vR;dgPnjFQRw;o{F-fWNcpR4oZs8aJcK~GPBCda1%DzG9pkBu? z9f0?efYbBc@@-{2mOvy*0oum_D&f}3zZm74rEJ@!>m?xU9YAs}0nbbT@T`hYTvfwM zX9ND;4=BM?`hO6*&z+(7B(QNLfIG(kidefiW5hnd`N&it81(rpS`5gmhTk|nkoenD zwvrV53%ff2f@EnmJ!)$_i6VM1+$!&BxZOoMb}(YC%%wPX(_%CgjfV3C$isy2FS^8q zyjUXOt1SQo>{8KHKc@o}prI9gFx*Zmmh=e7jkzmPXo000SH}>_GC&c!{-m8Y&H;=V zA1e7#xI3t-cno*2A;G{qfxd)`G=Mm{6!2BYZ#|0Y<}zF%8sNo5lpMaHOHfjJsCqW5 zS>%HW{8{)EtYyK1UD$~aMJa;`4yJRjvm$kKBT==1`n&vZ20xjo45Ne6)X zEEql;;Zkd*x3kFjaXJ*O!ER;J4vtFHi(w>N++R*`FO1e;#S+*Qw~vl#3G>^nMu&=(TG zZrvOc|HA99-)n}-o#rZcTlUEZ(5vZ;akv4u4$$uVVE7?S=jT^y%T3wyu3K%M6!DCG zM={~&kxxyAKsCA9n}Db{QD5>%FonYaK+&d7{plpf&2;J%^)p8#d{8N9;HmmcK+G?# zFZJ^YrM@;YUDeq6Pw||17+vC)9V?mOP5Xr|r|e=Iz{{8iu-kXv3i;pl=sUdl$qD&( zO|2w`6%>qIV;&6ubIm#9MJebo`^CGylgBUiuYN8X@4yGcfp~%e8XBn?(rWOvcw!Iy z5(4fM4K*~rmm-Ku^t;|2fTZNa8=gD>2w924l|*5KEoFx(XQGEd5jqQd0LLBT9CPjr zAg2iN$-N=$oDRbN0{2uP_#VI}K>;8WC<1VJOgJ7tMk5Y@@J>;BhRCOcP)0dgVZcpx z7!ZUe_Gp@M%(5s&n4^Mpv;#gtl_(A5E6}BEIcUKET(GFsm1T^j5_fH538A$nGMOjF zdiMDS?W)YPMA_#g3KPY3*1&R%X2MgM^J_A?8l(hkHK|xsV&*l3ycC&9xu>X9a4Q2z z@gApldd^3s3+HoEJfTb3hR*Vg>?Nt(YGR585VpcT#587Mb>)Cqu63E(bT%y=MbZE^ z`g=O5M`%(ivSz|r3WQnsOs=XktvkpDM8Ky99=Hza4D{nEG;fUb`kuiv6_NS_8zHEw(^A-U;yw2L>hE#LhH_3>2E8|2e&Nm-Dm%n`*tyK=qp%uvyQ1@MHZ*{W_@z>;(0}gQJ4f5(brcl>qrTH#phb7JKm(VRfk-HpRcRk zG{fsyIR1LP@R@EVn|@iCN2LN)wdNOQ1=g_pC3VIAdd>7or8!2GzE~d&v};Tqotvw8 zIdXM{O3ZCj4XXLtI}MG&wh))da$Vs!D&OTr98PrK@G7MlDswsP%D%c=?-)NRjBn^{ zYxkVsNAfs1qk|*(b%WmCBH^5&>8_7(J>Oo>ING&Fc}`P91IN0_sGVDHs(+rg)XXev zl>;l|V2i%%9$xfI8Q0Y6<`m&NBN31*$L>~}etC@P5v{-TiB>J)(TbVloTr=Z#XJ44 zXE>M3I?gWN8tW%l<{rL%bRdV?G<6(d{{6)I72lnzv&CbJa&XwL8V1d|@bWUcLujQO zVJAsEp20m{$?CP0T7k6f#X#(59^qWC1hb!f$r3lZOGVQ$iB4)gA0)^168JdPPhm;! z;>)!z-e<_+YC{nb?8!mhd)~sZI#7|iMy1xs&q-*mf*KSV>)K>!&3)fsEvz7Y6&)q)WA2b{UuSXMlAGW)^A!$Ify4reijKDfLA%4$BF9CkS@ z`MfpQV`TkiPLtaeyd2Jx7msDO0#~Z)<@)R{;SQag^^4qdqlV_#99{Hq-}0Cx&wO98 z-*F+XYd~5yxwKYa-ma)r@NJnluJUCM>U(-Xls7>B&|z<3w{3!Hzq6%>#gYBhD}f%+ zk`i)|W7}Gd&t%V1AZIvZJNnx`xjWg--`6KR#6B?74sgRz%QUc2tKY z&-7{6Ov?9;MC-bITt_Fa-e!7fyY<&0T`vYQyM1#F8iG6)oS$iN{RY_{7M54!T0no~ z`Y5{DUtbCOZaa?$3dU-%<-g{*RT8OHwr{`ANb^Hbnd-m#4MpjRdCTJu!FiJA52NsQl0n|aM4RYSf{?!``9&!f{U>A8p4 zUi+8gEpxeNAFi-tCw~xIp1+`8_ft<^x^nBjc&GltT?-ECKh@8mr2<2t0D?v%0bdC= zz)PnwU8n=FI*QWm8jKtL&uX#$3E68bGtk2T_`*_ANj{VVh?5dnWJ^Gid6Xp0aCjgq zDM3W-7cL)Smajy7F_d|O_<`OiPM>^v6Jf$Mf*Afvt__r z>`+FXUEx@!7heRiNjcDvT=Aw<07lU^Cw74-B{VTm0PZ1?$P-C*E2%SuqtM^ASP*Luf>2dgcOuofU$x+P=QEoccNeCta&m37_Qb?ZsQ_l^ zwjRoft+f2xRB2(&)N$Q=2EI^=7FLTz{%W^YNKK<=wTivt2n9sj--qyCVfIXQ!e=4f zWyUIhQnbxzp0I11&hTVy;TJ_aEbxRaK#f4A4#BXYo7&!j1d8l^A&D^!p?l8g@YIBl zvPWym-Iwtfve$H;Pads(s)y}ilHvnVu*rbdVy$Pq-c6=4lub5P^OseW8=pRP$=l52 zh%G1z=m6yypR~!EEE_%|Ep*JRtSDO-*2{R{FnRy6ou%Jo4Qv?&;<&B1hpu7kKWY4O zKtv2i(N-|meoD>N&NU1;6?kdg68DRuW!T4@NG#rHx-S&CX(A4dbF#}Wx_Ta(>Bf8| zW)=53TELbaIgX3qQn&M~eDUyG@0sN2DJ8TPtC~{mWPH2j^mE4oWl0dMR3Ko^SBo&UDo~UUN^3z<%evATS{Q4VILGmJ{`t zK*0yAx;woNl=Q11^oh8b3-9fT8Em>T5p;JlEdT`>xICQ89YNZ_c z>pDd;+9uJ)?oZGD7}* zzTA4fu`(n3Zh?x4!DfoD%L;cOK2^Fq6H@_)3QlVw)pAMiw6{SPJF?F>>j^@%vIwnr zo(dH8yBO4$%;A)<|19^F8#El)jBioY#sT>*Yor$yCQkkNHc*VR;b%0FV% z;^H!^T9M6hzrSmXMb#_Vks^`VwPRM#2fbGgyQu;tFN zsKx;F>RO8zG)8vTGzL>_b`5WYhh~UW($yeGwJYor<2p^2GkQ?rp*bQ*?fOjW&Oau* zCKql>#wUDAPIm&_X88z9=uFdp)u&GR-Ar-OmZ%{Blm_6FU(E2RL_GyS)fn4mH&2vC z8odHzhWm<7MW211!jzBu#SCLj-gC1dM5z1C497~=Wd*|xdRM1h>^Z>}^}_OA1q{Eb z-pqtD!&{k-Cc(k~6DC+i;$LHeRZRXhCRjz`4->3%_YV`SBKYq!!7A>5m|zuu0C0s; z&c9jUfBgQ72}aoj03`K4ceriu zddmdgGQqb@@GTR3%LLyt!M9BCEfajp1pkXnFm!SAoB*{S8IYWRi9o=Q{%>7@iMTV6n}RD<=uQJmwXFmlj1)vX138x8mV6DwoGpV(cA?qX}aC&>QK zigOxwpW74B6Z8&jdK-!UgOzFZ+&0Yy>?+An8UpZ$ZGQVH@^UolPmT~ zra0=!*-Z?D*-(OompR1$lDA4LUvX>+riyqhZH`#9;W8)KWBzvT8-(b;6U)16)H z?4`Rh-gmkKr$gf3y2TiwSs(B2VB&l{Fk+#UJfirmIM?mTelJ=EM;aSftcKl>Rppnw z6`LZP!qM0oLzlZv8`Hp8%$m?(a`{e$DsDD5S-+P}^W`9Z+ixZ~|Jy(f8Gs#a%^}Yd zs4k;*BddvfO%T=3vpP96bjZg8mDPmm7PorOvZ#&J)JZA~6Xa%V?&_6;9)*@O*4!u#FYc$XRg_||{ zz(-j>hstWc@vrA{SrBNKOoqy8>bzamv3(f6+W1KSMpiRt?d^iGizyM}MApspL1EKC zVv^kq|1+54TvmaqesRLHzSOjwvTd?K^-}HGw1=+^geRdT$Ir!@a^{cp-oMysf$A2| z&54=EtP3tKLz!TRLz7{a7US!E6Kuryqb=(J@96F!qjn!=JNI|G@co!7@|cGz&h=!6 zw{_7`=yMGnQ(~||EClLpM|>#Nbq>5U!js2Nf}pHSovG{A)ZUG5aZHuVtNlyths)== ztUo_*|G=7AfwD5$H5U|LsD}c6R8?%h&W&eW^UUpO+rL7Ms0;BE$A&V&sK`BIfiPLk zA1IHfVjVxn?_OSs=Wc)hNxgsd&3vr!ar_TB34I5^B&#}jm;*_e1m}!K2lI)BYIg(e zPs9$O|EW5ttMv{G@-qCBkOQB!4{H|o_{MAQa?3~3Q!^lhD2`m5_Of8o zj_ia6d(z1%jPuYHdM74niGdkxjz<<%1mlm?)D*}17#NQ8QsmP#l!R1S26>B8@hbJm z`gGZBLu+`o+6|#hu#IhDL%P<^JLQZ3&UKJ?`WTXxiq`wP>%4s#^0`W#eEOIs_XJ{a z*KHw&GteDmaH z+r2rZoUa$T^M`*ygAYwxh^bk~`tP%fn5=)!BiQ>Rz3Cpbryg|(nUnLQicg0*!kglV zxX#V#Ce^HuSNE#M2H$m5|8az^SO@$PaC-ocnwN07c*4M6`oi3CeIlFvAJ*PG&%MBt z!gYq}3&RB5(T)X3rK_)`t_rZ{y=eX+y%|DVfSXNl`Ei6?oJIMjI07HNj+O4H_TIWO zC$9&WDolEF5I-7`n{JFZ=1A^ne(g6jh^n#~p*AKX%crh`Nhmv#;zG%(Vef)YHhkbc zC4VE}K9&$m7p~>m&BpNlco!z%uBx*%wh5NTQO6rA(7h3GpF$5Z3ei2fN3NxH0{P_# zGp8&N9~z7h=EVP!-h88+oQNPc5lVV*X0qtU5mMUB_%7Ju&FjOU!KB?jsGh78C!=Wt zY#tR?_A|o3anWkDS^6L%v^auoE@+PLq{-W28U_twy5f@Q=zpgS{vrs|UWnmmW}DMi z1vXO=RK9Zwb%aB6su_uME#c7O2)rL_ z*?Sp_-`*WiVEDDI>LDq<_n=TGM03x=imo{fgce7%Q@FB+uTYfmLB7RRflIu|B1A@x zbJz@rwkZX4Cr)CKHS12=ZlJ+OKJ~siQ0dK5)B4MKzg+CFkFP zi`@94t-N+W>dgmGec|QrvyW+E`oa!-$6_#jVM^xSa4&JFBRqO+gns&ix@z~`4+Y|_ z%bzdy27V}Htv?>dg5x6rek`%0hNv9DH{T(KUwV2cq#6z7y#agY%dBbWS{xRij+ryg zmn%Cg!j0BQ5!7X=FHi4g5`368dAn@2+`$8`bbx#Krti5 z{Bo1n1e#I0W|(~AA2)X?hjg6careY1?wnBDAcS}yc6_gq>sb42gO#AgAxCEalinsY zk&9bQQPGgSvXE{%0rG0DFF=%e03#&@FHVcFPnN}2<`r{NuCCIIiAs@8Rhrg7iZZDK zcCgL)iG^sP&~b!oRQ?bfB;y0m}#H#@M>j@u~hHcGpV(r%-)+bHce@x+VyzfL@5VnH3H zcmQar8dqYJAQoD!mW=wvhZ+E_`sD7HJFsgUSHS+U4Oo>+qozVydmiUjMGsT-q))@kAwq`=4+# zZvOuoH{+J_uW>VO!GE|JH~T-_jGOzv&&{}%{^4fas$uK=(3=JH4%58)_4_Yw1`!pi zZ8i4i4hPkKGw1*S(>?&O$_4;CH>gaV)Gar2tEG2y_1tnZ(67)fH*>3{cdMm$tEG3V WrFW~PcdMm$tEG3VrT4FD>HRO8$m#b0 literal 0 HcmV?d00001 From a61409c2ee75343c50cc9f67e4b75f902c12002d Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 24 Nov 2022 16:38:06 +0100 Subject: [PATCH 2/6] feat: store images from multi page tif file in chunks Signed-off-by: anna-charlotte --- docarray/document/mixins/image.py | 51 +++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/docarray/document/mixins/image.py b/docarray/document/mixins/image.py index 6d055747262..16f62f4394a 100644 --- a/docarray/document/mixins/image.py +++ b/docarray/document/mixins/image.py @@ -3,7 +3,7 @@ import math import struct import warnings -from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING +from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING, List import numpy as np @@ -186,10 +186,19 @@ def load_uri_to_image_tensor( :return: itself after processed """ + from docarray import Document, DocumentArray buffer = _uri_to_blob(self.uri, **kwargs) tensor = _to_image_tensor(io.BytesIO(buffer), width=width, height=height) - self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis) + + if isinstance(tensor, np.ndarray): + self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis) + elif isinstance(tensor, List): + n_imgs = len(tensor) + self.chunks = DocumentArray( + [Document(tensor=tensor[i]) for i in range(n_imgs)] + ) + return self def set_image_tensor_inv_normalization( @@ -359,26 +368,48 @@ def _to_image_tensor( source, width: Optional[int] = None, height: Optional[int] = None, -) -> 'np.ndarray': +) -> Union[np.ndarray, List[np.array]]: """ - Convert an image blob to tensor + Convert an image blob to tensor or List of image tensors, if multiple images are stored in file. :param source: binary blob or file path :param width: the width of the image tensor. :param height: the height of the tensor. - :return: image tensor + :return: image tensor or list of image tensors """ from PIL import Image raw_img = Image.open(source) + if width or height: new_width = width or raw_img.width new_height = height or raw_img.height - raw_img = raw_img.resize((new_width, new_height)) - try: - return np.array(raw_img.convert('RGB')) - except: - return np.array(raw_img) + + # support multi page tiff images + if raw_img.n_frames > 1: + + frames = [] + for i in range(raw_img.n_frames): + raw_img.seek(i) + try: + img = raw_img.convert('RGB') + except: + img = raw_img + + if width or height: + frames.append(np.array(img.resize((new_width, new_height)))) + else: + frames.append(np.array(img)) + + return frames + + else: + if width or height: + raw_img = raw_img.resize((new_width, new_height)) + try: + return np.array(raw_img.convert('RGB')) + except: + return np.array(raw_img) def _to_image_buffer(arr: 'np.ndarray', image_format: str) -> bytes: From 5a04efc6bdd60ae3ef75d973ed563a6d55f4cc5a Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 24 Nov 2022 17:04:14 +0100 Subject: [PATCH 3/6] fix: add move channel to axis Signed-off-by: anna-charlotte --- docarray/document/mixins/image.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docarray/document/mixins/image.py b/docarray/document/mixins/image.py index 16f62f4394a..fa91b99119b 100644 --- a/docarray/document/mixins/image.py +++ b/docarray/document/mixins/image.py @@ -194,9 +194,15 @@ def load_uri_to_image_tensor( if isinstance(tensor, np.ndarray): self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis) elif isinstance(tensor, List): - n_imgs = len(tensor) self.chunks = DocumentArray( - [Document(tensor=tensor[i]) for i in range(n_imgs)] + [ + Document( + tensor=_move_channel_axis( + tensor[i], original_channel_axis=channel_axis + ) + ) + for i in range(len(tensor)) + ] ) return self From db683ca7e6869fbc4e8b740ae3b8a9ee8ded95d7 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Fri, 25 Nov 2022 10:42:32 +0100 Subject: [PATCH 4/6] fix: get n frame in try block Signed-off-by: anna-charlotte --- docarray/document/mixins/image.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docarray/document/mixins/image.py b/docarray/document/mixins/image.py index fa91b99119b..7f4c32d22f0 100644 --- a/docarray/document/mixins/image.py +++ b/docarray/document/mixins/image.py @@ -392,7 +392,12 @@ def _to_image_tensor( new_height = height or raw_img.height # support multi page tiff images - if raw_img.n_frames > 1: + try: + n_frames = raw_img.n_frames + except AttributeError: + n_frames = 1 + + if n_frames > 1: frames = [] for i in range(raw_img.n_frames): From 2591dc2a7b58193534cb9584603cec05aa3beec5 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 28 Nov 2022 10:17:44 +0100 Subject: [PATCH 5/6] docs: update documentation Signed-off-by: anna-charlotte --- docs/datatypes/image/index.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/datatypes/image/index.md b/docs/datatypes/image/index.md index e92067a6a5e..85b6aa37689 100644 --- a/docs/datatypes/image/index.md +++ b/docs/datatypes/image/index.md @@ -33,6 +33,26 @@ print(d.tensor, d.tensor.shape) (618, 641, 3) ``` +DocArray also supports loading multi-page tiff files. In this case, the image tensors are being stored to the `.tensor` attributes at chunk level instead of top level. + +```python +from docarray import Document + +d = Document(uri='muti_page_tiff_file.tiff') +d.load_uri_to_image_tensor() + +d.summary() +``` + +```text + + └─ chunks + ├─ + ├─ + └─ +``` + + ## Simple image processing DocArray provides some functions to help you preprocess the image data. You can resize it (i.e. downsampling/upsampling) and normalize it; you can switch the channel axis of the `.tensor` to meet certain requirements of other framework; and finally you can chain all these preprocessing steps together in one line. For example, before feeding data into a Pytorch-based ResNet Executor, the image needs to be normalized and the color axis should be at first, not at the last. You can do this via: @@ -150,7 +170,9 @@ d.chunks.plot_image_sprites('simpsons-chunks.png') Hmm, doesn't change so much. This is because we scan the whole image using sliding windows with no overlap (i.e. stride). Let's do a bit oversampling: ```python -d.convert_image_tensor_to_sliding_windows(window_shape=(64, 64), strides=(10, 10), as_chunks=True) +d.convert_image_tensor_to_sliding_windows( + window_shape=(64, 64), strides=(10, 10), as_chunks=True +) d.chunks.plot_image_sprites('simpsons-chunks-stride-10.png') ``` From a53593518e0d073cc781e6b7f7759393b77785fe Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 28 Nov 2022 13:14:00 +0100 Subject: [PATCH 6/6] docs: apply suggestions from code review Co-authored-by: Alex Cureton-Griffiths Signed-off-by: anna-charlotte --- docs/datatypes/image/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/datatypes/image/index.md b/docs/datatypes/image/index.md index 85b6aa37689..2284b51c3c5 100644 --- a/docs/datatypes/image/index.md +++ b/docs/datatypes/image/index.md @@ -33,7 +33,7 @@ print(d.tensor, d.tensor.shape) (618, 641, 3) ``` -DocArray also supports loading multi-page tiff files. In this case, the image tensors are being stored to the `.tensor` attributes at chunk level instead of top level. +DocArray also supports loading multi-page tiff files. In this case, the image tensors are stored to the `.tensor` attributes at the chunk-level instead of the top-level. ```python from docarray import Document