From 19e58d197f508428eee52bfbf2b7fca7a9e0d0f6 Mon Sep 17 00:00:00 2001 From: mikeAdamsMOJ Date: Sun, 23 Nov 2025 15:28:11 +0000 Subject: [PATCH] upgrade preview to respect cell formatting --- jupyterbook/basics/selection-formatting.ipynb | 2 +- jupyterbook/examples/students.ipynb | 362 ++++++++++++++++++ tests/fixtures/xlsx/Students.xlsx | Bin 0 -> 8514 bytes tidychef/acquire/excel_time.py | 2 + tidychef/acquire/xls/shared.py | 41 +- tidychef/acquire/xlsx/shared.py | 19 +- tidychef/models/source/cellformat.py | 49 +++ tidychef/notebook/preview/html/components.py | 38 +- tidychef/notebook/preview/html/constants.py | 17 + tidychef/notebook/preview/html/table.py | 123 ++++-- 10 files changed, 619 insertions(+), 34 deletions(-) create mode 100644 jupyterbook/examples/students.ipynb create mode 100644 tests/fixtures/xlsx/Students.xlsx diff --git a/jupyterbook/basics/selection-formatting.ipynb b/jupyterbook/basics/selection-formatting.ipynb index 5b96952b..7bb01cef 100644 --- a/jupyterbook/basics/selection-formatting.ipynb +++ b/jupyterbook/basics/selection-formatting.ipynb @@ -135,7 +135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.13.7" } }, "nbformat": 4, diff --git a/jupyterbook/examples/students.ipynb b/jupyterbook/examples/students.ipynb new file mode 100644 index 00000000..8fe878d5 --- /dev/null +++ b/jupyterbook/examples/students.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "b3ed8031-f02e-4a8f-a5b6-4a1551bea391", + "metadata": {}, + "source": [ + "# Students\n", + "\n", + "This example uses some fictional student data to showcase how to cell formatting is often used to represent hierarcical relationships in tablated data sources.\n", + "\n", + "_Note - the dataused here is fictional, the structure (and formatting) is not and was taken from a real UK government data source._\n", + "\n", + "First - this is how the data looks.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0e947a89-995f-441d-bf1f-994f9f3b42a2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

sheet1

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ABCDE
1Student count by location
2Note - data is entirely fictional for technical example
3May-25Jun-25Jul-25
4London100200150
5Inner80130120
6Camden203040
7Greenwitch305050
8Hackney305030
9Outer207030
10Brent85015
11Bromley122015
12
13Cardiff1308891
14Inner956050
15Roathe504048
16Cathays45202
17Outer352841
18Pontcanna171819
19Llandaff181022
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from tidychef import acquire, preview\n", + "\n", + "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n", + "preview(table)" + ] + }, + { + "cell_type": "markdown", + "id": "7936fddc-7bb8-4812-bf8d-637f0582a7c6", + "metadata": {}, + "source": [ + "There is an obvious hierachy here that is only denoted by the use of bold and cell indentation." + ] + }, + { + "cell_type": "markdown", + "id": "7fe02e4c-40f6-458b-9489-f878927caf50", + "metadata": {}, + "source": [ + "# Requirements\n", + "\n", + "To keep this simple we're going to go with:\n", + "\n", + "- Area (London or Cardiff)\n", + "- Sub Area (Inner or Outer)\n", + "- Place - the actual location " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d4a9c72b-0328-44b0-98c9-2bcbd1d21c41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

Preview

\n", + " \n", + " \n", + "
\n", + "
\n", + " Selections\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Area
Sub Area
Place
Period
Values
\n", + "
\n", + "
\n", + " Multiple Selection Warnings\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Area + Place + Sub Area (2 cells) → AreaSub AreaPlace\n", + "
\n", + " Place + Sub Area (4 cells) → Sub AreaPlace\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ABCDE
1Student count by location
2Note - data is entirely fictional for technical example
3May-25Jun-25Jul-25
4London100200150
5Inner80130120
6Camden203040
7Greenwitch305050
8Hackney305030
9Outer207030
10Brent85015
11Bromley122015
12
13Cardiff1308891
14Inner956050
15Roathe504048
16Cathays45202
17Outer352841
18Pontcanna171819
19Llandaff181022
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ValuesAreaSub AreaPlacePeriod
100LondonLondonLondonMay-25
80LondonInnerInnerMay-25
20LondonInnerCamdenMay-25
30LondonInnerGreenwitchMay-25
30LondonInnerHackneyMay-25
20LondonOuterOuterMay-25
8LondonOuterBrentMay-25
12LondonOuterBromleyMay-25
130CardiffCardiffCardiffMay-25
95CardiffInnerInnerMay-25
50CardiffInnerRoatheMay-25
45CardiffInnerCathaysMay-25
35CardiffOuterOuterMay-25
17CardiffOuterPontcannaMay-25
18CardiffOuterLlandaffMay-25
200LondonLondonLondonJun-25
130LondonInnerInnerJun-25
30LondonInnerCamdenJun-25
50LondonInnerGreenwitchJun-25
50LondonInnerHackneyJun-25
70LondonOuterOuterJun-25
50LondonOuterBrentJun-25
20LondonOuterBromleyJun-25
88CardiffCardiffCardiffJun-25
60CardiffInnerInnerJun-25
40CardiffInnerRoatheJun-25
20CardiffInnerCathaysJun-25
28CardiffOuterOuterJun-25
18CardiffOuterPontcannaJun-25
10CardiffOuterLlandaffJun-25
150LondonLondonLondonJul-25
120LondonInnerInnerJul-25
40LondonInnerCamdenJul-25
50LondonInnerGreenwitchJul-25
30LondonInnerHackneyJul-25
30LondonOuterOuterJul-25
15LondonOuterBrentJul-25
15LondonOuterBromleyJul-25
91CardiffCardiffCardiffJul-25
50CardiffInnerInnerJul-25
48CardiffInnerRoatheJul-25
2CardiffInnerCathaysJul-25
41CardiffOuterOuterJul-25
19CardiffOuterPontcannaJul-25
22CardiffOuterLlandaffJul-25
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tidychef import acquire, preview\n", + "from tidychef.direction import right, up, left, down\n", + "from tidychef.output import Column, TidyData\n", + "\n", + "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n", + "\n", + "# Area is any bold cell in column A that's neither indended nor underlined\n", + "area = table.excel_ref(\"A\").is_bold().is_not_indented().is_not_underline().label_as(\"Area\")\n", + "\n", + "# Sub Area is any bold cell in column A that is indented\n", + "sub_area = (table.excel_ref(\"A\").is_bold().is_indented() | area).label_as(\"Sub Area\")\n", + "\n", + "# Place is any non blank cell in column A that is indended but is NOT bold\n", + "place = (table.excel_ref(\"A\").is_not_blank().is_not_bold().is_indented() | sub_area).label_as(\"Place\")\n", + "\n", + "# Get the period with a simple string selection\n", + "period = table.cell_containing_string(\"May-25\").expand(right).is_not_blank().label_as(\"Period\")\n", + "\n", + "# Values are numbers that are beneath periods\n", + "values = period.fill(down).is_not_blank().label_as(\"Values\")\n", + "\n", + "# Create selection preview\n", + "preview(area, sub_area, place, period, values)\n", + "\n", + "# Now we define the visual relationships between our selections to create tidydata\n", + "tidy_data = TidyData(\n", + " values,\n", + " Column(area.attach_closest(down)),\n", + " Column(sub_area.attach_closest(down)),\n", + " Column(place.attach_closest(down)),\n", + " Column(period.attach_directly(down))\n", + ")\n", + "\n", + "tidy_data.to_csv(\"students.csv\")\n", + "\n", + "tidy_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce267d9d-5de5-4e92-b040-a3867075dc0b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/fixtures/xlsx/Students.xlsx b/tests/fixtures/xlsx/Students.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1eb935749b598744986532d54e472db3cbdba534 GIT binary patch literal 8514 zcmeHM1y>yDwr#xe#tH6jA-EGXxVyWB#@#(g&;WrT!9BQJfXQy;jw#t~#f>zuJ2rQ<8;&#Rk9wfB*o19AI*oVPyaX0K~xp09XJZw5}M~-o@PB z#Zb-D!Q5G&#p9(NSq>~TT{Zw3a{s^E|L_WwB@HQbv4JG6q`!-;Gs`SgivT$fg8DES zl!ZFG6MIWcbu+E4A2VOwfu6s|vEi=599{5cKN+^Fwtrb46w%wFf)W+j->Ggu$j#c* zw@=-H_awndNBbxnn^5HG6GOuoi*!J$V?&n;hxm%9Oc{X{4z5tZTE<+T2F6lv6H19r z5pVze$7KbLMGT(7;z`ijIMx>%YkF^kvo+=yuu&A=3%G6u`AXzks`Mr*O-{EdvhB`v zjnAfxOraOq!s6P{zKtxwx3qJU$a4@>7e}6#==8?fGIsdr`FM5Ey^cCpx^3pdu`*=V z%@heF51N<(DA`8U;+C`9kNS2C9e*PGc1OK}Mby1SXx5w=8)v3~Pw|pzIlnJc zT#=WXA;_<=f1r`4Q}Mu*$j8259AO<+;`$N$5@VdY?S8^tjR%GhYX>mp_93tGb_uF{l9elALihn{(4!0 zykZv{O4yO~_wfF!sc&(h=d$ji(ku7`4@6y8qYYf*n-h z2wu7UiMKfY&L)<#6+ZoTZ(?})|DoiPJ9%ehGur1XX5D{Soc263h(+=C^#^$ z*+-@NsIxEpb!s_IhB7?6xe>cc`;P~LqDYGtArb2DBtceNmr8;V1QkLdbjZqh*s;2U zoor3OVA~&YD^E=sJjVv|%c^}ex*;aQlD7V$RA+7_y%$nn=UXOc5~1EVVpj;Oet*PI z8zrs|*yHycq27<+) zPgQD^h#2-}JGY@$5t_1Aci7MYbx}KuEN$l066?gbBUBh|zGv_*W|0ZdLZV-&hw3Sk zUc1q;?n=h|RIvk`f?os*5p=R`D?oo8R}PhqsrTBr1<316NS#auD0#&N-SQwDi4kCS zk%oR)$$axlsGniZRD#urhZcAHC4yU{3tE$2eZ%*N!`-y zd+p|*PMAK@R(>J$t28b-t;WP`<-iYzqDgFfhX;?VbcejtsQmeav7AWyrPO^*wN2#* zCsYc_j_V~Ed^C3iD%h3I{)trPgP;FFzh5F@tgIX4)|u zi#&oJZmv+S1Af~?qiDJPQMR*aF1Y=NWcIf#D5r^!#<@NduBQXS5P1F_z=1+ZDQA!? z%@7b10Dw>s0RLC${|w{bMIQ>1gF$HUzrB^I$jf%Kf!dHC!r9!@-Ec5wU05mh)%VbW z1GV(?H000x&X))on)S7&Wm#dsVV+0BeQsBrXsa+-7atjmA~9jTuY@W?)I_f9kxdjyB=S>q~95y3tuIx6>W5k0H z(Hp8=54a6c-7gQSZD=4%qqo?2Y!aKlgwG1{Bpp9{g7`uJ)e|tj{e_(I`lG}p@U->2 zg}mp8?Z@0(lntU%S&|Y?L7y-evhaQ>Qh)jc=Z&1VE$T-rF3;}<={x36nTF1fEY=75 zrj{UY`tOj@=kqDQfaL07*Z=@N z-psEv;NP-QrF+TMcCx4aB5I9RNRM5wRRz$|Nvr(MGda z5l|-uY!+a9>5naSkVs|GJ7(@AyvTi@jo@%@e{<4hV!7Wt1;`{5Z3&uQ*;%I#r^|k6jk-0uHcn&=mYNvxB-D(_d2VE zR|VJ1JU#mbFC`a>;4v74i#Z}xdg}k7x3M~f`dPtfdB_=5#Xt;&NEFq?&R#xh9fhDxZItES% zR`^lno$M8SAGe*xrv_^DS>*Qe$Byk@+2YvGtT0N~Fw!OOK0T(Gu0s#>Xo?8;%B7}qJ7J(!fBw9IJAWxR|PmXKXc2#SmLt_t;z ztIl4Uq?fswG)G(tj%2&Y+h$)fb*CE}i{x$OK9x^u@6FpT3W=2wsJEfU%db)+2UtQUXfuv|4V+kQ14)!#b~ z5ZKuza(xEL93d$A36(O<`PT$bQSNb)(6##Y@U@1?TxsHO2_tgsNiTDVlk5GL%-%iO zHyuEtQUrXw?u1zXqFLU+Se#4N?0PXQdrqoO2p1 z8g^v)-@l8txaia9n>>r{I26bn0e^HnQ%e=|jx=uX;ZdzMA`QOqsx)tDCz-pTZ#rLB zNp)pMv2w6F{#=QM$f}L^w)(XrUk!Ju?lg{>JPVU82M>pb1xP#P=l9OC#k69)_xtMK zIQR#QaVZwfhp?uG@S1q0Kxu48J{n0n*8>qVjk9>k)9 zBWH3GGezW?1lqJCdU%`$k;o)TrtUS?je^B`Ma$~4MBhDa8R)h{qOzS{Hi>cX2?h0` zEIqhnil}o0>TEk@Dd}^i%X5u|ZGnNH?ZOrdH{3x!Aa)i$?I?qn%MGzt(|IU0{TNeP zp(1WYP*S#%kdXGFS(>b>*4&8?0^_|Ti#1}YCaZ+BH%iGiY;!|(@V!$DL9QNV*F>g& zZFnLw#*A1E3iP*CLUN`2rS3K~+cZ3gvrDV6S5HOJl8aB9PEWx-l=cdfjRzSMYghWRGk$l))nif7lX@-4s` zRFMWOQ^s}sm2nbTK1E)*Tnl8%J+ZRrQT_(WHIaraC&n*ek5{?`r)aaf6`8FgWqEPQS%%PS+w-&ADO-5vc$Rqu$umE~;@_pxg6JJ( z7K9$J(0;TweoCc_mHA6^)}Q;ID*0LSr%JZrZ3_dNx$8}as$zIVSG%|0O9VDcj7Ezy zBT67r)!h4ri!tX>a}UFJ(^`M$@u@jRb@jPbVmo?`^ftw30|CKwDOZ4D?Oh?`5*~6|13QEMc@x5dH{W+snbakbcRzZuq?m13%BT;GOG_HjoMxBNg+C^!V2gh& zm6oa^yIO8A%I!Mw;9`&-@r_M{Ua)^A^X6QUpdkgt$CFVYRNu>E%{3blALr{NVOYrN!L=CR6-7i&$JMz5iG9JvB5uFH zrLSS8JZ<9GN8JZn!IAW{vx}LFSh6+$xh};5%QJ`7YpJ-8;l7XQvpqZx$3XULZFJ#` z48t-@G!oG#LpSp1#IVMV24A;v6O#7#CXdnJrojAC;P@oHgT34!y%ZA|0IHez#)4`r zj=VH%nOP|3ZDfzDi3xx-k7ls--6(ML*)iG1)yL$rOC(rp&d zt!(NO1S@$-!-NaNv^nRiT2c`Ck|97x7l%iIWc)d)LX4FcJ4JMFvx1~>`tI&(t9AYr zb>b{BTNtmhyNKpa=w>sw0C9$O-D6R$*e4tAl#r>gC$Fr(Wn3oRXm$xUOh;AZp$TS0 zZgRMAyxu%gwq>T^pX+Qa$By>CV)W-iTxkZy+icL1r5$UjjqxWGwo7RtUA>Yh zVR{p{8`S@qib$fbNHm_2K_D5A(J*|;4Q8!~yq}o|R`j}Z9hZ!eG=>QK8#B8+CINlR zellDkIkd=1j0Va>)Q}ezu?3d|sn2CL!iLNCD!}8HPEbWPUyxE1i+bzC3FPa|L%9$OO zlTttbLWDbcMRUXaDrFodhCe6_>tj^-AM#>za z0kBN&^W}PZ21f8(kDc{!*B1R!1aapN-KGIcL6=gxp022?j!>H0x&?Vn0>X5n%M+F? zE9G{}6Nw%F$Y*xMscIz%pB0cHCEtJf?Cj!cXYTwX-&xSmwVh|f_9NXF2DY`dbBrVy zwQ$IzhE|}=eKf$)RS4R4yA67Jjn3+Oj0!J^Q&Pw9?T6hsVp~8o+Nd&N<>d2G1RHw~m z_4yHf>gQ-%YTK zh3)?!b}IH2nPk6h{DU|_Q`+KFU7>g4p$P@g-9WT(k7SHFw|TP0aWR+0DNA9s12+=v_ zTBL&{kFJe{FER!Gl1t!x5BjltD~ys%#+*Y zk-B+&>t014=i4VQLwNT{gvZ&#(&=34OLoms`prgsFad05J2wJbJ!%XG?l~1cm~{1_ zn^9*@Rx2zI(hWkG6y*8jtlI$ab$k@9*C$cn)qNq_GDnS3q{%H|n?Ea9*|2Lf51^zu zTO1|nFYxGl{)nf?7voRQnybknGJi9+2P5i9M$2z3a`_BzjnR9I*N5|NQF18Fv1=Vm z1v=HU2Cqjs#XaN})q>E76F(no&M&08 zzS%KNWy@m7ifXpFYouFeijzH_1Uy(^AxFtR4wr%zpOui@nj4dNg|<%qJKo0ctt-2e zi&M+nx4GIqD?xd8!OS7YaYl@T2;UzK9iEdJ;3x{WdyQj289Y z=qbtYXcq^O1G=H7#HrrW5sPlJ56?3>rIGb|jd>*0awV$N_DV!|T|{?(39Sqv~+BGIlaIQ+08&wzvGrK(qLH8JMR(Dvl-c^>%q20kRg|%4Mj? z$Z!Pw#95kDxoIE_D{buW$d(ZcH6#WG6=EGlzV&X?S$3fqAi31-j$ZhWw2Vd+#8 z(y}xXvD7O2)$_>sg{_EmVF#KPnwE+cd%^*68*KL&zBofK#gO8#Sjy2A_yk8^LtAbT z%E;Uzjrgrw*~{_{n0ry4scSy+6QbVbt+_3ront#tH`OU7i=qmiq6kaZ-@(*x3^{g# z?7_F#BP%49pm9cr+an^{gdBwpm=g7EAM^IbxdQ3>vP6{vn~khO3h)X9{|E-Fw;uWZ zqn=UUL@oJTq93w!(ee6Ph~_|iM{3VI$S-artShf*aH%6dtE5+<*6EC3!$ zV5Ek~-xY@E;IVa#)>6Lw(@5{z*UejY^k^$%i(Z`AEfqt})n6kXc>UfA3tZ$D4DP#A z1FG)*2oAdhDxC5~77u4)*5?eVovFEq(vTkQ)l|-3tMM#DnPD2{BlM}B>Med_wTQsU zYg9NBU>=k(P+>0!Y1O#y^W_3uD6eI8V^C8Q%a26R0`}TE9tG|oO@hDY#9ijlx8V@X z0U>b|^k>W&J2?CcISAH%+ukJ%|7f3t{Aiy*A3fcu1tJBTfT-x5tdwBfCeD&|w8gDd zmhE9%6qnwF@How@g72Q;`w>m(kzT&vewwIQr7|Q@j1C3MJeQG4%oB7xHlvIWwc zEr(?A$PsrWfU%9a@%amzH$ZIkB0cAiqYuNPX4Z*MD2h z^nrr}-!$ez!kt|$ajj(Qk+VFJ@Os)G$-tuOqgPvn*19*jt}I8bQbR@UBif zi`*`jk)H`iq&Tm}8?RZ|sW!;FcU~udyBV|7>DQ87D6~_A zIU}puU`t)72f1r?SU(t`fFA^W?OIxOlCoqxyJtUdU$NTx|Lkh~7>YqbGeaWIKToTF ze_6jD|6z9htAoGJ)qXepaZG_c@=sH?Uk!hqHT+@P3Yk9q)`k0h^6;ziU;Dg&m;wO4 zh(C@0pRVt(etvEK{o$z+k`w>-BmUM3{ME~^b@3lw=0QKb{8}ad>fqNz{0|4EkmTS; zZt#C;`LCwGCK7*`(m=YI|CCnz>fx_Z^ACFf!21aR@V6NHtNC9A@XzK)gnu&sO%9c0 V;UI4p06>C#0w7_!jO54Je*hF7kh=f? literal 0 HcmV?d00001 diff --git a/tidychef/acquire/excel_time.py b/tidychef/acquire/excel_time.py index 53e0ccfc..fcba37cf 100644 --- a/tidychef/acquire/excel_time.py +++ b/tidychef/acquire/excel_time.py @@ -34,6 +34,8 @@ "M/D/YY": "%m/%d/%y", # Month/Year with 4-digit year (e.g., 5/2023) "m/yyyy": "%-m/%Y", + # Month as three letter abbreviation and 2 digit year (e.g., May-23) + "mmm-yy": "%b-%y", # Year/Month with 4-digit year (e.g., 2023/5) "yyyy/m": "%Y/%-m", # Day/Month/Year with 2-digit year (e.g., 1/5/23) diff --git a/tidychef/acquire/xls/shared.py b/tidychef/acquire/xls/shared.py index 7c4b4345..f8273909 100644 --- a/tidychef/acquire/xls/shared.py +++ b/tidychef/acquire/xls/shared.py @@ -90,17 +90,50 @@ def sheets_from_workbook( is_hyperlink = True break - # Get indentation level from XF alignment + # Get alignment information from XF indent_level = 0 - if hasattr(xf, 'alignment') and hasattr(xf.alignment, 'indent_level'): - indent_level = xf.alignment.indent_level + horizontal_alignment = None + vertical_alignment = None + + if hasattr(xf, 'alignment'): + # Get indentation level + if hasattr(xf.alignment, 'indent_level'): + indent_level = xf.alignment.indent_level + + # Get horizontal alignment - XLS uses integers: + # 0 = general, 1 = left, 2 = center, 3 = right, 4 = fill, 5 = justify + if hasattr(xf.alignment, 'hor_align'): + hor_align = xf.alignment.hor_align + alignment_map = { + 0: None, # general - let Excel decide + 1: 'left', + 2: 'center', + 3: 'right', + 4: 'fill', # not common, treat as general + 5: 'justify' + } + horizontal_alignment = alignment_map.get(hor_align) + + # Get vertical alignment - XLS uses integers: + # 0 = top, 1 = center, 2 = bottom, 3 = justify + if hasattr(xf.alignment, 'vert_align'): + vert_align = xf.alignment.vert_align + vertical_map = { + 0: 'top', + 1: 'center', + 2: 'bottom', + 3: 'justify' + } + vertical_alignment = vertical_map.get(vert_align) cell_formatting = CellFormatting( bold=is_bold, italic=is_italic, underline=is_underline, hyperlink=is_hyperlink, - indent_level=indent_level + indent_level=indent_level, + horizontal_alignment=horizontal_alignment, + vertical_alignment=vertical_alignment ) if cell.ctype == 3: # Date Cell diff --git a/tidychef/acquire/xlsx/shared.py b/tidychef/acquire/xlsx/shared.py index d58cb12d..890a59f3 100644 --- a/tidychef/acquire/xlsx/shared.py +++ b/tidychef/acquire/xlsx/shared.py @@ -56,6 +56,8 @@ def sheets_from_workbook( is_underline = False is_hyperlink = False indent_level = 0 + horizontal_alignment = None + vertical_alignment = None if opycell.font: is_bold = opycell.font.bold if opycell.font.bold is not None else False @@ -63,8 +65,17 @@ def sheets_from_workbook( # Check for underline - openpyxl uses 'single', 'double', etc. or None is_underline = opycell.font.underline is not None and opycell.font.underline != 'none' - if opycell.alignment and opycell.alignment.indent is not None: - indent_level = int(opycell.alignment.indent) + if opycell.alignment: + if opycell.alignment.indent is not None: + indent_level = int(opycell.alignment.indent) + + # Extract horizontal alignment (None means 'general' in Excel) + if opycell.alignment.horizontal is not None: + horizontal_alignment = opycell.alignment.horizontal + + # Extract vertical alignment (None means 'bottom' in Excel) + if opycell.alignment.vertical is not None: + vertical_alignment = opycell.alignment.vertical # Check if cell is a hyperlink is_hyperlink = opycell.hyperlink is not None @@ -74,7 +85,9 @@ def sheets_from_workbook( italic=is_italic, underline=is_underline, hyperlink=is_hyperlink, - indent_level=indent_level + indent_level=indent_level, + horizontal_alignment=horizontal_alignment, + vertical_alignment=vertical_alignment ) if opycell.is_date and opycell.internal_value is not None: diff --git a/tidychef/models/source/cellformat.py b/tidychef/models/source/cellformat.py index 6c1cbdf7..4a3bd13e 100644 --- a/tidychef/models/source/cellformat.py +++ b/tidychef/models/source/cellformat.py @@ -11,6 +11,8 @@ class CellFormatting: underline: Optional[bool] = None hyperlink: Optional[bool] = None indent_level: Optional[int] = None + horizontal_alignment: Optional[str] = None # 'left', 'center', 'right', 'justify', 'general' + vertical_alignment: Optional[str] = None # 'top', 'center', 'bottom' def is_bold(self) -> bool: """ @@ -107,3 +109,50 @@ def is_indented(self) -> bool: "Indentation level is unknown. Cannot determine if cell is indented." ) return self.indent_level > 0 + + def get_horizontal_alignment(self) -> str: + """ + Get the horizontal alignment of the cell. + + Returns: + str: The horizontal alignment ('left', 'center', 'right', 'justify', 'general') + Returns 'general' if alignment is not specified (Excel default behavior) + """ + return self.horizontal_alignment or 'general' + + def get_vertical_alignment(self) -> str: + """ + Get the vertical alignment of the cell. + + Returns: + str: The vertical alignment ('top', 'center', 'bottom') + Returns 'bottom' if alignment is not specified (Excel default behavior) + """ + return self.vertical_alignment or 'bottom' + + def is_left_aligned(self) -> bool: + """ + Check if the cell is left-aligned. + + Returns: + bool: True if cell is explicitly left-aligned, False otherwise + """ + return self.horizontal_alignment == 'left' + + def is_center_aligned(self) -> bool: + """ + Check if the cell is center-aligned. + + Returns: + bool: True if cell is center-aligned, False otherwise + """ + return self.horizontal_alignment == 'center' + + def is_right_aligned(self) -> bool: + """ + Check if the cell is right-aligned. + + Returns: + bool: True if cell is right-aligned, False otherwise + """ + return self.horizontal_alignment == 'right' diff --git a/tidychef/notebook/preview/html/components.py b/tidychef/notebook/preview/html/components.py index f390635e..4265538b 100644 --- a/tidychef/notebook/preview/html/components.py +++ b/tidychef/notebook/preview/html/components.py @@ -108,6 +108,7 @@ def as_html(self): Create the html representation of this cell with formatting. """ content = str(self.value) + cell_styles = [f"background-color:{self.colour}"] # Apply text formatting if cell formatting is available if self.cell and self.cell.cellformat: @@ -140,4 +141,39 @@ def as_html(self): except Exception: logger.error("Error checking underline formatting", exc_info=True) - return f'{content}' + # Apply alignment and indentation formatting - handle each separately + # Handle indentation first (takes precedence and implies left alignment) + try: + if (self.cell.cellformat.indent_level is not None and + self.cell.cellformat.indent_level > 0): + indent_level = self.cell.cellformat.indent_level + # Apply padding-left based on indent level - make it more pronounced than Excel's subtle 8px + # Using 20px per level to make indentation clearly visible in HTML previews + padding_left = indent_level * 20 + cell_styles.append(f"padding-left: {padding_left}px") + # Indented cells are always left-aligned in Excel + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + else: + # Handle horizontal alignment for non-indented cells + alignment = self.cell.cellformat.get_horizontal_alignment() + if alignment != 'general': + # Apply explicit alignment (but not for 'general') + # Use !important to override Jupyter notebook CSS + cell_styles.append(f"text-align: {alignment} !important") + else: + # For 'general' alignment, override the CSS center alignment + # Excel's general alignment: text left, numbers right + # Since we don't easily distinguish types here, default to left + # which is more appropriate for most data + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + except Exception: + logger.error("Error checking alignment/indentation formatting", exc_info=True) + # Fallback - at least override the center alignment from CSS + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + + # Combine all styles + style_attr = "; ".join(cell_styles) + return f'{content}' diff --git a/tidychef/notebook/preview/html/constants.py b/tidychef/notebook/preview/html/constants.py index a4ca4baf..d9d45d14 100644 --- a/tidychef/notebook/preview/html/constants.py +++ b/tidychef/notebook/preview/html/constants.py @@ -17,6 +17,23 @@ "#b380ff", ] +# Separate color palette for multiple selection combinations +# These colors are distinct from individual selection colors to avoid confusion +MULTIPLE_SELECTION_COLOURS = [ + "#ffb3b3", # Light red + "#ffd9b3", # Light orange + "#ffffb3", # Light yellow + "#d9ffb3", # Light lime + "#b3ffb3", # Light green + "#b3ffff", # Light cyan + "#c6e6ff", # Very light blue (different from #b3d9ff) + "#d9b3ff", # Light purple + "#ffb3ff", # Light magenta + "#ffb3d9", # Light pink + "#e6ccb3", # Light brown + "#cccccc", # Light gray +] + # Simple CSS to make it pretty-ish INLINE_CSS = """