diff --git a/jupyterbook/basics/selection-formatting.ipynb b/jupyterbook/basics/selection-formatting.ipynb index 5b96952..7bb01ce 100644 --- a/jupyterbook/basics/selection-formatting.ipynb +++ b/jupyterbook/basics/selection-formatting.ipynb @@ -135,7 +135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.13.7" } }, "nbformat": 4, diff --git a/jupyterbook/examples/students.ipynb b/jupyterbook/examples/students.ipynb new file mode 100644 index 0000000..8fe878d --- /dev/null +++ b/jupyterbook/examples/students.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "b3ed8031-f02e-4a8f-a5b6-4a1551bea391", + "metadata": {}, + "source": [ + "# Students\n", + "\n", + "This example uses some fictional student data to showcase how to cell formatting is often used to represent hierarcical relationships in tablated data sources.\n", + "\n", + "_Note - the dataused here is fictional, the structure (and formatting) is not and was taken from a real UK government data source._\n", + "\n", + "First - this is how the data looks.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0e947a89-995f-441d-bf1f-994f9f3b42a2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

sheet1

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ABCDE
1Student count by location
2Note - data is entirely fictional for technical example
3May-25Jun-25Jul-25
4London100200150
5Inner80130120
6Camden203040
7Greenwitch305050
8Hackney305030
9Outer207030
10Brent85015
11Bromley122015
12
13Cardiff1308891
14Inner956050
15Roathe504048
16Cathays45202
17Outer352841
18Pontcanna171819
19Llandaff181022
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from tidychef import acquire, preview\n", + "\n", + "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n", + "preview(table)" + ] + }, + { + "cell_type": "markdown", + "id": "7936fddc-7bb8-4812-bf8d-637f0582a7c6", + "metadata": {}, + "source": [ + "There is an obvious hierachy here that is only denoted by the use of bold and cell indentation." + ] + }, + { + "cell_type": "markdown", + "id": "7fe02e4c-40f6-458b-9489-f878927caf50", + "metadata": {}, + "source": [ + "# Requirements\n", + "\n", + "To keep this simple we're going to go with:\n", + "\n", + "- Area (London or Cardiff)\n", + "- Sub Area (Inner or Outer)\n", + "- Place - the actual location " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d4a9c72b-0328-44b0-98c9-2bcbd1d21c41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "

Preview

\n", + " \n", + " \n", + "
\n", + "
\n", + " Selections\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Area
Sub Area
Place
Period
Values
\n", + "
\n", + "
\n", + " Multiple Selection Warnings\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Area + Place + Sub Area (2 cells) → AreaSub AreaPlace\n", + "
\n", + " Place + Sub Area (4 cells) → Sub AreaPlace\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ABCDE
1Student count by location
2Note - data is entirely fictional for technical example
3May-25Jun-25Jul-25
4London100200150
5Inner80130120
6Camden203040
7Greenwitch305050
8Hackney305030
9Outer207030
10Brent85015
11Bromley122015
12
13Cardiff1308891
14Inner956050
15Roathe504048
16Cathays45202
17Outer352841
18Pontcanna171819
19Llandaff181022
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ValuesAreaSub AreaPlacePeriod
100LondonLondonLondonMay-25
80LondonInnerInnerMay-25
20LondonInnerCamdenMay-25
30LondonInnerGreenwitchMay-25
30LondonInnerHackneyMay-25
20LondonOuterOuterMay-25
8LondonOuterBrentMay-25
12LondonOuterBromleyMay-25
130CardiffCardiffCardiffMay-25
95CardiffInnerInnerMay-25
50CardiffInnerRoatheMay-25
45CardiffInnerCathaysMay-25
35CardiffOuterOuterMay-25
17CardiffOuterPontcannaMay-25
18CardiffOuterLlandaffMay-25
200LondonLondonLondonJun-25
130LondonInnerInnerJun-25
30LondonInnerCamdenJun-25
50LondonInnerGreenwitchJun-25
50LondonInnerHackneyJun-25
70LondonOuterOuterJun-25
50LondonOuterBrentJun-25
20LondonOuterBromleyJun-25
88CardiffCardiffCardiffJun-25
60CardiffInnerInnerJun-25
40CardiffInnerRoatheJun-25
20CardiffInnerCathaysJun-25
28CardiffOuterOuterJun-25
18CardiffOuterPontcannaJun-25
10CardiffOuterLlandaffJun-25
150LondonLondonLondonJul-25
120LondonInnerInnerJul-25
40LondonInnerCamdenJul-25
50LondonInnerGreenwitchJul-25
30LondonInnerHackneyJul-25
30LondonOuterOuterJul-25
15LondonOuterBrentJul-25
15LondonOuterBromleyJul-25
91CardiffCardiffCardiffJul-25
50CardiffInnerInnerJul-25
48CardiffInnerRoatheJul-25
2CardiffInnerCathaysJul-25
41CardiffOuterOuterJul-25
19CardiffOuterPontcannaJul-25
22CardiffOuterLlandaffJul-25
\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tidychef import acquire, preview\n", + "from tidychef.direction import right, up, left, down\n", + "from tidychef.output import Column, TidyData\n", + "\n", + "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n", + "\n", + "# Area is any bold cell in column A that's neither indended nor underlined\n", + "area = table.excel_ref(\"A\").is_bold().is_not_indented().is_not_underline().label_as(\"Area\")\n", + "\n", + "# Sub Area is any bold cell in column A that is indented\n", + "sub_area = (table.excel_ref(\"A\").is_bold().is_indented() | area).label_as(\"Sub Area\")\n", + "\n", + "# Place is any non blank cell in column A that is indended but is NOT bold\n", + "place = (table.excel_ref(\"A\").is_not_blank().is_not_bold().is_indented() | sub_area).label_as(\"Place\")\n", + "\n", + "# Get the period with a simple string selection\n", + "period = table.cell_containing_string(\"May-25\").expand(right).is_not_blank().label_as(\"Period\")\n", + "\n", + "# Values are numbers that are beneath periods\n", + "values = period.fill(down).is_not_blank().label_as(\"Values\")\n", + "\n", + "# Create selection preview\n", + "preview(area, sub_area, place, period, values)\n", + "\n", + "# Now we define the visual relationships between our selections to create tidydata\n", + "tidy_data = TidyData(\n", + " values,\n", + " Column(area.attach_closest(down)),\n", + " Column(sub_area.attach_closest(down)),\n", + " Column(place.attach_closest(down)),\n", + " Column(period.attach_directly(down))\n", + ")\n", + "\n", + "tidy_data.to_csv(\"students.csv\")\n", + "\n", + "tidy_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce267d9d-5de5-4e92-b040-a3867075dc0b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/fixtures/xlsx/Students.xlsx b/tests/fixtures/xlsx/Students.xlsx new file mode 100644 index 0000000..1eb9357 Binary files /dev/null and b/tests/fixtures/xlsx/Students.xlsx differ diff --git a/tidychef/acquire/excel_time.py b/tidychef/acquire/excel_time.py index 53e0ccf..fcba37c 100644 --- a/tidychef/acquire/excel_time.py +++ b/tidychef/acquire/excel_time.py @@ -34,6 +34,8 @@ "M/D/YY": "%m/%d/%y", # Month/Year with 4-digit year (e.g., 5/2023) "m/yyyy": "%-m/%Y", + # Month as three letter abbreviation and 2 digit year (e.g., May-23) + "mmm-yy": "%b-%y", # Year/Month with 4-digit year (e.g., 2023/5) "yyyy/m": "%Y/%-m", # Day/Month/Year with 2-digit year (e.g., 1/5/23) diff --git a/tidychef/acquire/xls/shared.py b/tidychef/acquire/xls/shared.py index 7c4b434..f827390 100644 --- a/tidychef/acquire/xls/shared.py +++ b/tidychef/acquire/xls/shared.py @@ -90,17 +90,50 @@ def sheets_from_workbook( is_hyperlink = True break - # Get indentation level from XF alignment + # Get alignment information from XF indent_level = 0 - if hasattr(xf, 'alignment') and hasattr(xf.alignment, 'indent_level'): - indent_level = xf.alignment.indent_level + horizontal_alignment = None + vertical_alignment = None + + if hasattr(xf, 'alignment'): + # Get indentation level + if hasattr(xf.alignment, 'indent_level'): + indent_level = xf.alignment.indent_level + + # Get horizontal alignment - XLS uses integers: + # 0 = general, 1 = left, 2 = center, 3 = right, 4 = fill, 5 = justify + if hasattr(xf.alignment, 'hor_align'): + hor_align = xf.alignment.hor_align + alignment_map = { + 0: None, # general - let Excel decide + 1: 'left', + 2: 'center', + 3: 'right', + 4: 'fill', # not common, treat as general + 5: 'justify' + } + horizontal_alignment = alignment_map.get(hor_align) + + # Get vertical alignment - XLS uses integers: + # 0 = top, 1 = center, 2 = bottom, 3 = justify + if hasattr(xf.alignment, 'vert_align'): + vert_align = xf.alignment.vert_align + vertical_map = { + 0: 'top', + 1: 'center', + 2: 'bottom', + 3: 'justify' + } + vertical_alignment = vertical_map.get(vert_align) cell_formatting = CellFormatting( bold=is_bold, italic=is_italic, underline=is_underline, hyperlink=is_hyperlink, - indent_level=indent_level + indent_level=indent_level, + horizontal_alignment=horizontal_alignment, + vertical_alignment=vertical_alignment ) if cell.ctype == 3: # Date Cell diff --git a/tidychef/acquire/xlsx/shared.py b/tidychef/acquire/xlsx/shared.py index d58cb12..890a59f 100644 --- a/tidychef/acquire/xlsx/shared.py +++ b/tidychef/acquire/xlsx/shared.py @@ -56,6 +56,8 @@ def sheets_from_workbook( is_underline = False is_hyperlink = False indent_level = 0 + horizontal_alignment = None + vertical_alignment = None if opycell.font: is_bold = opycell.font.bold if opycell.font.bold is not None else False @@ -63,8 +65,17 @@ def sheets_from_workbook( # Check for underline - openpyxl uses 'single', 'double', etc. or None is_underline = opycell.font.underline is not None and opycell.font.underline != 'none' - if opycell.alignment and opycell.alignment.indent is not None: - indent_level = int(opycell.alignment.indent) + if opycell.alignment: + if opycell.alignment.indent is not None: + indent_level = int(opycell.alignment.indent) + + # Extract horizontal alignment (None means 'general' in Excel) + if opycell.alignment.horizontal is not None: + horizontal_alignment = opycell.alignment.horizontal + + # Extract vertical alignment (None means 'bottom' in Excel) + if opycell.alignment.vertical is not None: + vertical_alignment = opycell.alignment.vertical # Check if cell is a hyperlink is_hyperlink = opycell.hyperlink is not None @@ -74,7 +85,9 @@ def sheets_from_workbook( italic=is_italic, underline=is_underline, hyperlink=is_hyperlink, - indent_level=indent_level + indent_level=indent_level, + horizontal_alignment=horizontal_alignment, + vertical_alignment=vertical_alignment ) if opycell.is_date and opycell.internal_value is not None: diff --git a/tidychef/models/source/cellformat.py b/tidychef/models/source/cellformat.py index 6c1cbdf..4a3bd13 100644 --- a/tidychef/models/source/cellformat.py +++ b/tidychef/models/source/cellformat.py @@ -11,6 +11,8 @@ class CellFormatting: underline: Optional[bool] = None hyperlink: Optional[bool] = None indent_level: Optional[int] = None + horizontal_alignment: Optional[str] = None # 'left', 'center', 'right', 'justify', 'general' + vertical_alignment: Optional[str] = None # 'top', 'center', 'bottom' def is_bold(self) -> bool: """ @@ -107,3 +109,50 @@ def is_indented(self) -> bool: "Indentation level is unknown. Cannot determine if cell is indented." ) return self.indent_level > 0 + + def get_horizontal_alignment(self) -> str: + """ + Get the horizontal alignment of the cell. + + Returns: + str: The horizontal alignment ('left', 'center', 'right', 'justify', 'general') + Returns 'general' if alignment is not specified (Excel default behavior) + """ + return self.horizontal_alignment or 'general' + + def get_vertical_alignment(self) -> str: + """ + Get the vertical alignment of the cell. + + Returns: + str: The vertical alignment ('top', 'center', 'bottom') + Returns 'bottom' if alignment is not specified (Excel default behavior) + """ + return self.vertical_alignment or 'bottom' + + def is_left_aligned(self) -> bool: + """ + Check if the cell is left-aligned. + + Returns: + bool: True if cell is explicitly left-aligned, False otherwise + """ + return self.horizontal_alignment == 'left' + + def is_center_aligned(self) -> bool: + """ + Check if the cell is center-aligned. + + Returns: + bool: True if cell is center-aligned, False otherwise + """ + return self.horizontal_alignment == 'center' + + def is_right_aligned(self) -> bool: + """ + Check if the cell is right-aligned. + + Returns: + bool: True if cell is right-aligned, False otherwise + """ + return self.horizontal_alignment == 'right' diff --git a/tidychef/notebook/preview/html/components.py b/tidychef/notebook/preview/html/components.py index f390635..4265538 100644 --- a/tidychef/notebook/preview/html/components.py +++ b/tidychef/notebook/preview/html/components.py @@ -108,6 +108,7 @@ def as_html(self): Create the html representation of this cell with formatting. """ content = str(self.value) + cell_styles = [f"background-color:{self.colour}"] # Apply text formatting if cell formatting is available if self.cell and self.cell.cellformat: @@ -140,4 +141,39 @@ def as_html(self): except Exception: logger.error("Error checking underline formatting", exc_info=True) - return f'{content}' + # Apply alignment and indentation formatting - handle each separately + # Handle indentation first (takes precedence and implies left alignment) + try: + if (self.cell.cellformat.indent_level is not None and + self.cell.cellformat.indent_level > 0): + indent_level = self.cell.cellformat.indent_level + # Apply padding-left based on indent level - make it more pronounced than Excel's subtle 8px + # Using 20px per level to make indentation clearly visible in HTML previews + padding_left = indent_level * 20 + cell_styles.append(f"padding-left: {padding_left}px") + # Indented cells are always left-aligned in Excel + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + else: + # Handle horizontal alignment for non-indented cells + alignment = self.cell.cellformat.get_horizontal_alignment() + if alignment != 'general': + # Apply explicit alignment (but not for 'general') + # Use !important to override Jupyter notebook CSS + cell_styles.append(f"text-align: {alignment} !important") + else: + # For 'general' alignment, override the CSS center alignment + # Excel's general alignment: text left, numbers right + # Since we don't easily distinguish types here, default to left + # which is more appropriate for most data + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + except Exception: + logger.error("Error checking alignment/indentation formatting", exc_info=True) + # Fallback - at least override the center alignment from CSS + # Use !important to override Jupyter notebook CSS + cell_styles.append("text-align: left !important") + + # Combine all styles + style_attr = "; ".join(cell_styles) + return f'{content}' diff --git a/tidychef/notebook/preview/html/constants.py b/tidychef/notebook/preview/html/constants.py index a4ca4ba..d9d45d1 100644 --- a/tidychef/notebook/preview/html/constants.py +++ b/tidychef/notebook/preview/html/constants.py @@ -17,6 +17,23 @@ "#b380ff", ] +# Separate color palette for multiple selection combinations +# These colors are distinct from individual selection colors to avoid confusion +MULTIPLE_SELECTION_COLOURS = [ + "#ffb3b3", # Light red + "#ffd9b3", # Light orange + "#ffffb3", # Light yellow + "#d9ffb3", # Light lime + "#b3ffb3", # Light green + "#b3ffff", # Light cyan + "#c6e6ff", # Very light blue (different from #b3d9ff) + "#d9b3ff", # Light purple + "#ffb3ff", # Light magenta + "#ffb3d9", # Light pink + "#e6ccb3", # Light brown + "#cccccc", # Light gray +] + # Simple CSS to make it pretty-ish INLINE_CSS = """