diff --git a/jupyterbook/basics/selection-formatting.ipynb b/jupyterbook/basics/selection-formatting.ipynb
index 5b96952..7bb01ce 100644
--- a/jupyterbook/basics/selection-formatting.ipynb
+++ b/jupyterbook/basics/selection-formatting.ipynb
@@ -135,7 +135,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.13.0"
+ "version": "3.13.7"
}
},
"nbformat": 4,
diff --git a/jupyterbook/examples/students.ipynb b/jupyterbook/examples/students.ipynb
new file mode 100644
index 0000000..8fe878d
--- /dev/null
+++ b/jupyterbook/examples/students.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "b3ed8031-f02e-4a8f-a5b6-4a1551bea391",
+ "metadata": {},
+ "source": [
+ "# Students\n",
+ "\n",
+ "This example uses some fictional student data to showcase how to cell formatting is often used to represent hierarcical relationships in tablated data sources.\n",
+ "\n",
+ "_Note - the dataused here is fictional, the structure (and formatting) is not and was taken from a real UK government data source._\n",
+ "\n",
+ "First - this is how the data looks.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "0e947a89-995f-441d-bf1f-994f9f3b42a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ " sheet1
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | A | B | C | D | E |
\n",
+ "| 1 | Student count by location | | | | |
\n",
+ "| 2 | Note - data is entirely fictional for technical example | | | | |
\n",
+ "| 3 | | May-25 | Jun-25 | Jul-25 | |
\n",
+ "| 4 | London | 100 | 200 | 150 | |
\n",
+ "| 5 | Inner | 80 | 130 | 120 | |
\n",
+ "| 6 | Camden | 20 | 30 | 40 | |
\n",
+ "| 7 | Greenwitch | 30 | 50 | 50 | |
\n",
+ "| 8 | Hackney | 30 | 50 | 30 | |
\n",
+ "| 9 | Outer | 20 | 70 | 30 | |
\n",
+ "| 10 | Brent | 8 | 50 | 15 | |
\n",
+ "| 11 | Bromley | 12 | 20 | 15 | |
\n",
+ "| 12 | | | | | |
\n",
+ "| 13 | Cardiff | 130 | 88 | 91 | |
\n",
+ "| 14 | Inner | 95 | 60 | 50 | |
\n",
+ "| 15 | Roathe | 50 | 40 | 48 | |
\n",
+ "| 16 | Cathays | 45 | 20 | 2 | |
\n",
+ "| 17 | Outer | 35 | 28 | 41 | |
\n",
+ "| 18 | Pontcanna | 17 | 18 | 19 | |
\n",
+ "| 19 | Llandaff | 18 | 10 | 22 | |
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from tidychef import acquire, preview\n",
+ "\n",
+ "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n",
+ "preview(table)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7936fddc-7bb8-4812-bf8d-637f0582a7c6",
+ "metadata": {},
+ "source": [
+ "There is an obvious hierachy here that is only denoted by the use of bold and cell indentation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7fe02e4c-40f6-458b-9489-f878927caf50",
+ "metadata": {},
+ "source": [
+ "# Requirements\n",
+ "\n",
+ "To keep this simple we're going to go with:\n",
+ "\n",
+ "- Area (London or Cardiff)\n",
+ "- Sub Area (Inner or Outer)\n",
+ "- Place - the actual location "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "d4a9c72b-0328-44b0-98c9-2bcbd1d21c41",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Preview
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
Selections\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Area | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ " | Sub Area | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ " | Place | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ " | Period | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ " | Values | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
Multiple Selection Warnings\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Area + Place + Sub Area (2 cells) → AreaSub AreaPlace\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Place + Sub Area (4 cells) → Sub AreaPlace\n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | A | B | C | D | E |
\n",
+ "| 1 | Student count by location | | | | |
\n",
+ "| 2 | Note - data is entirely fictional for technical example | | | | |
\n",
+ "| 3 | | May-25 | Jun-25 | Jul-25 | |
\n",
+ "| 4 | London | 100 | 200 | 150 | |
\n",
+ "| 5 | Inner | 80 | 130 | 120 | |
\n",
+ "| 6 | Camden | 20 | 30 | 40 | |
\n",
+ "| 7 | Greenwitch | 30 | 50 | 50 | |
\n",
+ "| 8 | Hackney | 30 | 50 | 30 | |
\n",
+ "| 9 | Outer | 20 | 70 | 30 | |
\n",
+ "| 10 | Brent | 8 | 50 | 15 | |
\n",
+ "| 11 | Bromley | 12 | 20 | 15 | |
\n",
+ "| 12 | | | | | |
\n",
+ "| 13 | Cardiff | 130 | 88 | 91 | |
\n",
+ "| 14 | Inner | 95 | 60 | 50 | |
\n",
+ "| 15 | Roathe | 50 | 40 | 48 | |
\n",
+ "| 16 | Cathays | 45 | 20 | 2 | |
\n",
+ "| 17 | Outer | 35 | 28 | 41 | |
\n",
+ "| 18 | Pontcanna | 17 | 18 | 19 | |
\n",
+ "| 19 | Llandaff | 18 | 10 | 22 | |
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | Values | Area | Sub Area | Place | Period |
| 100 | London | London | London | May-25 |
| 80 | London | Inner | Inner | May-25 |
| 20 | London | Inner | Camden | May-25 |
| 30 | London | Inner | Greenwitch | May-25 |
| 30 | London | Inner | Hackney | May-25 |
| 20 | London | Outer | Outer | May-25 |
| 8 | London | Outer | Brent | May-25 |
| 12 | London | Outer | Bromley | May-25 |
| 130 | Cardiff | Cardiff | Cardiff | May-25 |
| 95 | Cardiff | Inner | Inner | May-25 |
| 50 | Cardiff | Inner | Roathe | May-25 |
| 45 | Cardiff | Inner | Cathays | May-25 |
| 35 | Cardiff | Outer | Outer | May-25 |
| 17 | Cardiff | Outer | Pontcanna | May-25 |
| 18 | Cardiff | Outer | Llandaff | May-25 |
| 200 | London | London | London | Jun-25 |
| 130 | London | Inner | Inner | Jun-25 |
| 30 | London | Inner | Camden | Jun-25 |
| 50 | London | Inner | Greenwitch | Jun-25 |
| 50 | London | Inner | Hackney | Jun-25 |
| 70 | London | Outer | Outer | Jun-25 |
| 50 | London | Outer | Brent | Jun-25 |
| 20 | London | Outer | Bromley | Jun-25 |
| 88 | Cardiff | Cardiff | Cardiff | Jun-25 |
| 60 | Cardiff | Inner | Inner | Jun-25 |
| 40 | Cardiff | Inner | Roathe | Jun-25 |
| 20 | Cardiff | Inner | Cathays | Jun-25 |
| 28 | Cardiff | Outer | Outer | Jun-25 |
| 18 | Cardiff | Outer | Pontcanna | Jun-25 |
| 10 | Cardiff | Outer | Llandaff | Jun-25 |
| 150 | London | London | London | Jul-25 |
| 120 | London | Inner | Inner | Jul-25 |
| 40 | London | Inner | Camden | Jul-25 |
| 50 | London | Inner | Greenwitch | Jul-25 |
| 30 | London | Inner | Hackney | Jul-25 |
| 30 | London | Outer | Outer | Jul-25 |
| 15 | London | Outer | Brent | Jul-25 |
| 15 | London | Outer | Bromley | Jul-25 |
| 91 | Cardiff | Cardiff | Cardiff | Jul-25 |
| 50 | Cardiff | Inner | Inner | Jul-25 |
| 48 | Cardiff | Inner | Roathe | Jul-25 |
| 2 | Cardiff | Inner | Cathays | Jul-25 |
| 41 | Cardiff | Outer | Outer | Jul-25 |
| 19 | Cardiff | Outer | Pontcanna | Jul-25 |
| 22 | Cardiff | Outer | Llandaff | Jul-25 |
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from tidychef import acquire, preview\n",
+ "from tidychef.direction import right, up, left, down\n",
+ "from tidychef.output import Column, TidyData\n",
+ "\n",
+ "table = acquire.xlsx.local(\"/Users/michael.adams/Code/tidychef/tests/fixtures/xlsx/Students.xlsx\")\n",
+ "\n",
+ "# Area is any bold cell in column A that's neither indended nor underlined\n",
+ "area = table.excel_ref(\"A\").is_bold().is_not_indented().is_not_underline().label_as(\"Area\")\n",
+ "\n",
+ "# Sub Area is any bold cell in column A that is indented\n",
+ "sub_area = (table.excel_ref(\"A\").is_bold().is_indented() | area).label_as(\"Sub Area\")\n",
+ "\n",
+ "# Place is any non blank cell in column A that is indended but is NOT bold\n",
+ "place = (table.excel_ref(\"A\").is_not_blank().is_not_bold().is_indented() | sub_area).label_as(\"Place\")\n",
+ "\n",
+ "# Get the period with a simple string selection\n",
+ "period = table.cell_containing_string(\"May-25\").expand(right).is_not_blank().label_as(\"Period\")\n",
+ "\n",
+ "# Values are numbers that are beneath periods\n",
+ "values = period.fill(down).is_not_blank().label_as(\"Values\")\n",
+ "\n",
+ "# Create selection preview\n",
+ "preview(area, sub_area, place, period, values)\n",
+ "\n",
+ "# Now we define the visual relationships between our selections to create tidydata\n",
+ "tidy_data = TidyData(\n",
+ " values,\n",
+ " Column(area.attach_closest(down)),\n",
+ " Column(sub_area.attach_closest(down)),\n",
+ " Column(place.attach_closest(down)),\n",
+ " Column(period.attach_directly(down))\n",
+ ")\n",
+ "\n",
+ "tidy_data.to_csv(\"students.csv\")\n",
+ "\n",
+ "tidy_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce267d9d-5de5-4e92-b040-a3867075dc0b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/fixtures/xlsx/Students.xlsx b/tests/fixtures/xlsx/Students.xlsx
new file mode 100644
index 0000000..1eb9357
Binary files /dev/null and b/tests/fixtures/xlsx/Students.xlsx differ
diff --git a/tidychef/acquire/excel_time.py b/tidychef/acquire/excel_time.py
index 53e0ccf..fcba37c 100644
--- a/tidychef/acquire/excel_time.py
+++ b/tidychef/acquire/excel_time.py
@@ -34,6 +34,8 @@
"M/D/YY": "%m/%d/%y",
# Month/Year with 4-digit year (e.g., 5/2023)
"m/yyyy": "%-m/%Y",
+ # Month as three letter abbreviation and 2 digit year (e.g., May-23)
+ "mmm-yy": "%b-%y",
# Year/Month with 4-digit year (e.g., 2023/5)
"yyyy/m": "%Y/%-m",
# Day/Month/Year with 2-digit year (e.g., 1/5/23)
diff --git a/tidychef/acquire/xls/shared.py b/tidychef/acquire/xls/shared.py
index 7c4b434..f827390 100644
--- a/tidychef/acquire/xls/shared.py
+++ b/tidychef/acquire/xls/shared.py
@@ -90,17 +90,50 @@ def sheets_from_workbook(
is_hyperlink = True
break
- # Get indentation level from XF alignment
+ # Get alignment information from XF
indent_level = 0
- if hasattr(xf, 'alignment') and hasattr(xf.alignment, 'indent_level'):
- indent_level = xf.alignment.indent_level
+ horizontal_alignment = None
+ vertical_alignment = None
+
+ if hasattr(xf, 'alignment'):
+ # Get indentation level
+ if hasattr(xf.alignment, 'indent_level'):
+ indent_level = xf.alignment.indent_level
+
+ # Get horizontal alignment - XLS uses integers:
+ # 0 = general, 1 = left, 2 = center, 3 = right, 4 = fill, 5 = justify
+ if hasattr(xf.alignment, 'hor_align'):
+ hor_align = xf.alignment.hor_align
+ alignment_map = {
+ 0: None, # general - let Excel decide
+ 1: 'left',
+ 2: 'center',
+ 3: 'right',
+ 4: 'fill', # not common, treat as general
+ 5: 'justify'
+ }
+ horizontal_alignment = alignment_map.get(hor_align)
+
+ # Get vertical alignment - XLS uses integers:
+ # 0 = top, 1 = center, 2 = bottom, 3 = justify
+ if hasattr(xf.alignment, 'vert_align'):
+ vert_align = xf.alignment.vert_align
+ vertical_map = {
+ 0: 'top',
+ 1: 'center',
+ 2: 'bottom',
+ 3: 'justify'
+ }
+ vertical_alignment = vertical_map.get(vert_align)
cell_formatting = CellFormatting(
bold=is_bold,
italic=is_italic,
underline=is_underline,
hyperlink=is_hyperlink,
- indent_level=indent_level
+ indent_level=indent_level,
+ horizontal_alignment=horizontal_alignment,
+ vertical_alignment=vertical_alignment
)
if cell.ctype == 3: # Date Cell
diff --git a/tidychef/acquire/xlsx/shared.py b/tidychef/acquire/xlsx/shared.py
index d58cb12..890a59f 100644
--- a/tidychef/acquire/xlsx/shared.py
+++ b/tidychef/acquire/xlsx/shared.py
@@ -56,6 +56,8 @@ def sheets_from_workbook(
is_underline = False
is_hyperlink = False
indent_level = 0
+ horizontal_alignment = None
+ vertical_alignment = None
if opycell.font:
is_bold = opycell.font.bold if opycell.font.bold is not None else False
@@ -63,8 +65,17 @@ def sheets_from_workbook(
# Check for underline - openpyxl uses 'single', 'double', etc. or None
is_underline = opycell.font.underline is not None and opycell.font.underline != 'none'
- if opycell.alignment and opycell.alignment.indent is not None:
- indent_level = int(opycell.alignment.indent)
+ if opycell.alignment:
+ if opycell.alignment.indent is not None:
+ indent_level = int(opycell.alignment.indent)
+
+ # Extract horizontal alignment (None means 'general' in Excel)
+ if opycell.alignment.horizontal is not None:
+ horizontal_alignment = opycell.alignment.horizontal
+
+ # Extract vertical alignment (None means 'bottom' in Excel)
+ if opycell.alignment.vertical is not None:
+ vertical_alignment = opycell.alignment.vertical
# Check if cell is a hyperlink
is_hyperlink = opycell.hyperlink is not None
@@ -74,7 +85,9 @@ def sheets_from_workbook(
italic=is_italic,
underline=is_underline,
hyperlink=is_hyperlink,
- indent_level=indent_level
+ indent_level=indent_level,
+ horizontal_alignment=horizontal_alignment,
+ vertical_alignment=vertical_alignment
)
if opycell.is_date and opycell.internal_value is not None:
diff --git a/tidychef/models/source/cellformat.py b/tidychef/models/source/cellformat.py
index 6c1cbdf..4a3bd13 100644
--- a/tidychef/models/source/cellformat.py
+++ b/tidychef/models/source/cellformat.py
@@ -11,6 +11,8 @@ class CellFormatting:
underline: Optional[bool] = None
hyperlink: Optional[bool] = None
indent_level: Optional[int] = None
+ horizontal_alignment: Optional[str] = None # 'left', 'center', 'right', 'justify', 'general'
+ vertical_alignment: Optional[str] = None # 'top', 'center', 'bottom'
def is_bold(self) -> bool:
"""
@@ -107,3 +109,50 @@ def is_indented(self) -> bool:
"Indentation level is unknown. Cannot determine if cell is indented."
)
return self.indent_level > 0
+
+ def get_horizontal_alignment(self) -> str:
+ """
+ Get the horizontal alignment of the cell.
+
+ Returns:
+ str: The horizontal alignment ('left', 'center', 'right', 'justify', 'general')
+ Returns 'general' if alignment is not specified (Excel default behavior)
+ """
+ return self.horizontal_alignment or 'general'
+
+ def get_vertical_alignment(self) -> str:
+ """
+ Get the vertical alignment of the cell.
+
+ Returns:
+ str: The vertical alignment ('top', 'center', 'bottom')
+ Returns 'bottom' if alignment is not specified (Excel default behavior)
+ """
+ return self.vertical_alignment or 'bottom'
+
+ def is_left_aligned(self) -> bool:
+ """
+ Check if the cell is left-aligned.
+
+ Returns:
+ bool: True if cell is explicitly left-aligned, False otherwise
+ """
+ return self.horizontal_alignment == 'left'
+
+ def is_center_aligned(self) -> bool:
+ """
+ Check if the cell is center-aligned.
+
+ Returns:
+ bool: True if cell is center-aligned, False otherwise
+ """
+ return self.horizontal_alignment == 'center'
+
+ def is_right_aligned(self) -> bool:
+ """
+ Check if the cell is right-aligned.
+
+ Returns:
+ bool: True if cell is right-aligned, False otherwise
+ """
+ return self.horizontal_alignment == 'right'
diff --git a/tidychef/notebook/preview/html/components.py b/tidychef/notebook/preview/html/components.py
index f390635..4265538 100644
--- a/tidychef/notebook/preview/html/components.py
+++ b/tidychef/notebook/preview/html/components.py
@@ -108,6 +108,7 @@ def as_html(self):
Create the html representation of this cell with formatting.
"""
content = str(self.value)
+ cell_styles = [f"background-color:{self.colour}"]
# Apply text formatting if cell formatting is available
if self.cell and self.cell.cellformat:
@@ -140,4 +141,39 @@ def as_html(self):
except Exception:
logger.error("Error checking underline formatting", exc_info=True)
- return f'{content} | '
+ # Apply alignment and indentation formatting - handle each separately
+ # Handle indentation first (takes precedence and implies left alignment)
+ try:
+ if (self.cell.cellformat.indent_level is not None and
+ self.cell.cellformat.indent_level > 0):
+ indent_level = self.cell.cellformat.indent_level
+ # Apply padding-left based on indent level - make it more pronounced than Excel's subtle 8px
+ # Using 20px per level to make indentation clearly visible in HTML previews
+ padding_left = indent_level * 20
+ cell_styles.append(f"padding-left: {padding_left}px")
+ # Indented cells are always left-aligned in Excel
+ # Use !important to override Jupyter notebook CSS
+ cell_styles.append("text-align: left !important")
+ else:
+ # Handle horizontal alignment for non-indented cells
+ alignment = self.cell.cellformat.get_horizontal_alignment()
+ if alignment != 'general':
+ # Apply explicit alignment (but not for 'general')
+ # Use !important to override Jupyter notebook CSS
+ cell_styles.append(f"text-align: {alignment} !important")
+ else:
+ # For 'general' alignment, override the CSS center alignment
+ # Excel's general alignment: text left, numbers right
+ # Since we don't easily distinguish types here, default to left
+ # which is more appropriate for most data
+ # Use !important to override Jupyter notebook CSS
+ cell_styles.append("text-align: left !important")
+ except Exception:
+ logger.error("Error checking alignment/indentation formatting", exc_info=True)
+ # Fallback - at least override the center alignment from CSS
+ # Use !important to override Jupyter notebook CSS
+ cell_styles.append("text-align: left !important")
+
+ # Combine all styles
+ style_attr = "; ".join(cell_styles)
+ return f'{content} | '
diff --git a/tidychef/notebook/preview/html/constants.py b/tidychef/notebook/preview/html/constants.py
index a4ca4ba..d9d45d1 100644
--- a/tidychef/notebook/preview/html/constants.py
+++ b/tidychef/notebook/preview/html/constants.py
@@ -17,6 +17,23 @@
"#b380ff",
]
+# Separate color palette for multiple selection combinations
+# These colors are distinct from individual selection colors to avoid confusion
+MULTIPLE_SELECTION_COLOURS = [
+ "#ffb3b3", # Light red
+ "#ffd9b3", # Light orange
+ "#ffffb3", # Light yellow
+ "#d9ffb3", # Light lime
+ "#b3ffb3", # Light green
+ "#b3ffff", # Light cyan
+ "#c6e6ff", # Very light blue (different from #b3d9ff)
+ "#d9b3ff", # Light purple
+ "#ffb3ff", # Light magenta
+ "#ffb3d9", # Light pink
+ "#e6ccb3", # Light brown
+ "#cccccc", # Light gray
+]
+
# Simple CSS to make it pretty-ish
INLINE_CSS = """