diff --git a/.gitignore b/.gitignore
index 5471529b..f49e3a69 100755
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,6 @@ ENV/
 # IDE settings
 .vscode/
 .idea/
+
+# UV
+uv.lock
diff --git a/cdm_reader_mapper/mdf_reader/properties.py b/cdm_reader_mapper/mdf_reader/properties.py
index 5289d9ae..f6f491be 100755
--- a/cdm_reader_mapper/mdf_reader/properties.py
+++ b/cdm_reader_mapper/mdf_reader/properties.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import polars as pl
+
 from ..properties import numeric_types, object_types, supported_data_models  # noqa
 
 _base = "cdm_reader_mapper.mdf_reader"
@@ -16,22 +18,23 @@
     "craid": ("drifter_measurements", "JULD"),
 }
 
-pandas_dtypes = {}
+polars_dtypes = {}
 for dtype in object_types:
-    pandas_dtypes[dtype] = "object"
-pandas_dtypes.update({x: x for x in numeric_types})
-pandas_dtypes["datetime"] = "datetime"
+    polars_dtypes[dtype] = pl.String
+polars_dtypes.update({x: x for x in numeric_types})
+polars_dtypes[pl.Datetime] = pl.Datetime
 
-pandas_int = "Int64"
+polars_int = pl.Int64
 
 # ....and how they are managed
 data_type_conversion_args = {}
 for dtype in numeric_types:
     data_type_conversion_args[dtype] = ["scale", "offset"]
-data_type_conversion_args["str"] = ["disable_white_strip"]
-data_type_conversion_args["object"] = ["disable_white_strip"]
-data_type_conversion_args["key"] = ["disable_white_strip"]
-data_type_conversion_args["datetime"] = ["datetime_format"]
+data_type_conversion_args[pl.Utf8] = ["disable_white_strip"]
+data_type_conversion_args[pl.String] = ["disable_white_strip"]
+data_type_conversion_args[pl.Categorical] = ["disable_white_strip"]
+data_type_conversion_args[pl.Object] = ["disable_white_strip"]
+data_type_conversion_args[pl.Datetime] = ["datetime_format"]
 
 # Misc ------------------------------------------------------------------------
 dummy_level = "_SECTION_"
diff --git a/cdm_reader_mapper/mdf_reader/reader.py b/cdm_reader_mapper/mdf_reader/reader.py
index 9a0ef14e..0a514b74 100755
--- a/cdm_reader_mapper/mdf_reader/reader.py
+++ b/cdm_reader_mapper/mdf_reader/reader.py
@@ -6,9 +6,11 @@
 import csv
 import logging
 import os
-from io import StringIO as StringIO
+from io import StringIO
 
 import pandas as pd
+import polars as pl
+import xarray as xr
 
 from cdm_reader_mapper.common.json_dict import open_json_file
 from cdm_reader_mapper.common.pandas_TextParser_hdlr import make_copy
@@ -45,24 +47,26 @@ def _convert_and_decode(
         converter_kwargs,
         decoder_dict,
     ):
+        """DOCUMENTATION."""
         for section in converter_dict.keys():
             if section not in df.columns:
                 continue
             if section in decoder_dict.keys():
                 decoded = decoder_dict[section](df[section])
-                decoded.index = df[section].index
-                df[section] = decoded
+                df = df.with_columns(decoded.alias(section))
 
             converted = converter_dict[section](
                 df[section], **converter_kwargs[section]
             )
-            converted.index = df[section].index
-            df[section] = converted
+            # converted.index = df[section].index
+            df = df.with_columns(converted.alias(section))
         return df
 
-    def _validate(self, df):
+    def _validate(self, df, mask) -> pl.DataFrame:
+        """DOCUMENTATION."""
         return validate(
             data=df,
+            mask=mask,
             imodel=self.imodel,
             ext_table_path=self.ext_table_path,
             schema=self.schema,
@@ -115,78 +119,31 @@ def convert_and_decode_entries(
         if decode is not True:
             decoder_dict = {}
 
-        if isinstance(data, pd.DataFrame):
-            data = self._convert_and_decode(
-                data,
-                converter_dict,
-                converter_kwargs,
-                decoder_dict,
-            )
-        else:
-            data_buffer = StringIO()
-            TextParser = make_copy(data)
-            for i, df_ in enumerate(TextParser):
-                df = self._convert_and_decode(
-                    df_,
-                    converter_dict,
-                    converter_kwargs,
-                    decoder_dict,
-                )
-                df.to_csv(
-                    data_buffer,
-                    header=False,
-                    mode="a",
-                    encoding=self.encoding,
-                    index=False,
-                    quoting=csv.QUOTE_NONE,
-                    sep=properties.internal_delimiter,
-                    quotechar="\0",
-                    escapechar="\0",
-                )
-
-            data_buffer.seek(0)
-            data = pd.read_csv(
-                data_buffer,
-                names=df.columns,
-                chunksize=self.chunksize,
-                dtype=object,
-                delimiter=properties.internal_delimiter,
-                quotechar="\0",
-                escapechar="\0",
-            )
+        data = self._convert_and_decode(
+            data,
+            converter_dict,
+            converter_kwargs,
+            decoder_dict,
+        )
         return data
 
-    def validate_entries(self, data, validate):
+    def validate_entries(self, data, mask, validate) -> pl.DataFrame:
         """Validate data entries by using a pre-defined data model.
 
         Fill attribute `valid` with boolean mask.
         """
         if validate is not True:
-            mask = pd.DataFrame()
-        elif isinstance(data, pd.DataFrame):
-            mask = self._validate(data)
+            mask = pl.DataFrame()
+        elif isinstance(data, pl.DataFrame):
+            mask = self._validate(data, mask)
         else:
-            data_buffer = StringIO()
-            TextParser_ = make_copy(data)
-            for i, df_ in enumerate(TextParser_):
-                mask_ = self._validate(df_)
-                mask_.to_csv(
-                    data_buffer,
-                    header=False,
-                    mode="a",
-                    encoding=self.encoding,
-                    index=False,
-                )
-            data_buffer.seek(0)
-            mask = pd.read_csv(
-                data_buffer,
-                names=df_.columns,
-                chunksize=self.chunksize,
-            )
+            raise TypeError("Unknown data type")
         return mask
 
     def remove_boolean_values(self, data):
         """DOCUMENTATION"""
+        if isinstance(data, pl.DataFrame):
+            return data
         if isinstance(data, pd.DataFrame):
             data = data.map(remove_boolean_values)
             dtype = adjust_dtype(self.dtypes, data)
@@ -273,6 +230,7 @@ def read(
 
         self.chunksize = chunksize
         self.skiprows = skiprows
+        self.format = properties.open_file.get(self.imodel, "text")
 
         # 2. READ AND VALIDATE DATA
         logging.info(f"EXTRACTING DATA FROM MODEL: {self.imodel}")
@@ -287,26 +245,71 @@ def read(
         # a list with a single dataframe or a pd.io.parsers.TextFileReader
         logging.info("Getting data string from source...")
         self.configurations = self.get_configurations(read_sections_list, sections)
-        data = self.open_data(
-            read_sections_list,
-            sections,
-            # INFO: Set default as "pandas" to account for custom schema
-            open_with=properties.open_file.get(self.imodel, "pandas"),
+
+        TextParser = self.open_data(
             chunksize=chunksize,
+            format=self.format,
         )
 
         # 2.3. Extract, read and validate data in same loop
-        logging.info("Extracting and reading sections")
-        data = self.convert_and_decode_entries(
-            data,
-            convert=convert,
-            decode=decode,
-        )
-        mask = self.validate_entries(data, validate)
+        if isinstance(TextParser, (pd.DataFrame, xr.Dataset)):
+            data, mask = self._read_loop(
+                TextParser, read_sections_list, sections, decode, convert, validate
+            )
+        else:
+            data_buffer = StringIO()
+            mask_buffer = StringIO()
+            for df_ in TextParser:
+                df, mask_ = self._read_loop(
+                    df_, read_sections_list, sections, decode, convert, validate
+                )
+                df.to_csv(
+                    data_buffer,
+                    header=False,
+                    mode="a",
+                    encoding="utf-8",
+                    index=False,
+                    quoting=csv.QUOTE_NONE,
+                    sep=properties.internal_delimiter,
+                    quotechar="\0",
+                    escapechar="\0",
+                )
+                mask_.to_csv(
+                    mask_buffer,
+                    header=False,
+                    mode="a",
+                    encoding="utf-8",
+                    index=False,
+                    quoting=csv.QUOTE_NONE,
+                    sep=properties.internal_delimiter,
+                    quotechar="\0",
+                    escapechar="\0",
+                )
+            data_buffer.seek(0)
+            data = pd.read_csv(
+                data_buffer,
+                names=self.columns,
+                chunksize=self.chunksize,
+                dtype=object,
+                parse_dates=self.parse_dates,
+                delimiter=properties.internal_delimiter,
+                quotechar="\0",
+                escapechar="\0",
+            )
+            mask_buffer.seek(0)
+            mask = pd.read_csv(
+                mask_buffer,
+                names=self.columns,
+                chunksize=self.chunksize,
+                dtype=object,
+                parse_dates=self.parse_dates,
+                delimiter=properties.internal_delimiter,
+                quotechar="\0",
+                escapechar="\0",
+            )
 
         # 3. Create output DataBundle object
-        logging.info("Creata output DataBundle object")
-        data = self.remove_boolean_values(data)
+        logging.info("Create output DataBundle object")
         return DataBundle(
             data=data,
             columns=self.columns,
@@ -317,6 +320,29 @@ def read(
             imodel=self.imodel,
         )
 
+    def _read_loop(
+        self, TextParser, order, valid, decode, convert, validate
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
+        logging.info("Extracting and reading sections")
+        data, mask = self._read_sections(TextParser, order, valid, format=self.format)
+
+        logging.info("Decoding and converting entries")
+        data = self.convert_and_decode_entries(
+            data,
+            convert=convert,
+            decode=decode,
+        )
+
+        logging.info("Extracting and reading sections")
+        mask = self.validate_entries(data, mask, validate)
+
+        renames = {c: tuple(c.split(":")) for c in data.columns}
+
+        return (
+            data.to_pandas().rename(columns=renames),
+            mask.to_pandas().rename(columns=renames),
+        )
+
 
 def read_mdf(
     source,
diff --git a/cdm_reader_mapper/mdf_reader/schemas/schemas.py b/cdm_reader_mapper/mdf_reader/schemas/schemas.py
index 1a0d115c..2afe668a 100755
--- a/cdm_reader_mapper/mdf_reader/schemas/schemas.py
+++ b/cdm_reader_mapper/mdf_reader/schemas/schemas.py
@@ -13,30 +13,64 @@
 import os
 from pathlib import Path
 
+import polars as pl
+
 from cdm_reader_mapper.common.json_dict import collect_json_files, combine_dicts
 
 from .. import properties
 
 
+def _get_type_map():
+    return {
+        "float": pl.Float64,
+        "float64": pl.Float64,
+        "float32": pl.Float32,
+        "int": pl.Int64,
+        "int64": pl.Int64,
+        "int32": pl.Int32,
+        "int16": pl.Int16,
+        "int8": pl.Int8,
+        "uint": pl.UInt64,
+        "uint64": pl.UInt64,
+        "uint32": pl.UInt32,
+        "uint16": pl.UInt16,
+        "uint8": pl.UInt8,
+        "str": pl.String,
+        "object": pl.String,
+        "key": pl.String,
+        "date": pl.Datetime,
+        "datetime": pl.Datetime,
+        "time": pl.Datetime,
+    }
+
+
 def convert_dtype_to_default(dtype, section, element):
     """Convert data type to defaults (int, float)."""
     if dtype is None:
-        return
-    elif dtype == "float":
-        return dtype
-    elif dtype == "int":
-        return properties.pandas_int
-    elif "float" in dtype.lower():
+        return  # pl.String
+    dtype_map = _get_type_map()
+    dtype = dtype.lower()
+
+    polars_dtype = dtype_map.get(dtype)
+    if polars_dtype is not None:
+        return polars_dtype
+
+    if "float" in dtype:
+        logging.warning(
+            f"Set column type of ({section}, {element}) from deprecated {dtype} to float 32."
+        )
+        return pl.Float32
+    elif "uint" in dtype:
         logging.warning(
-            f"Set column type of ({section}, {element}) from deprecated {dtype} to float."
+            f"Set column type of ({section}, {element}) from deprecated {dtype} to uint 32."
         )
-        return "float"
-    elif "int" in dtype.lower():
+        return pl.UInt32
+    elif "int" in dtype:
         logging.warning(
-            f"Set column type of ({section}, {element}) from deprecated {dtype} to int."
+            f"Set column type of ({section}, {element}) from deprecated {dtype} to int 32."
         )
-        return properties.pandas_int
-    return dtype
+        return pl.Int32
+    return pl.String
 
 
 def _read_schema(schema):
@@ -154,7 +188,7 @@ def read_schema(imodel=None, ext_schema_path=None, ext_schema_file=None):
     else:
         imodel = imodel.split("_")
         if imodel[0] not in properties.supported_data_models:
-            logging.error("Input data model " f"{imodel[0]}" " not supported")
+            logging.error(f"Input data model {imodel[0]} not supported")
             return
         schema_files = collect_json_files(*imodel, base=f"{properties._base}.schemas")
 
@@ -209,7 +243,7 @@ def clean_schema(columns, schema):
     def get_index(idx, lst, section):
         if len(lst) == 1:
             return idx
-        return (section, idx)
+        return ":".join([section, idx])
 
     flat_schema = dict()
     for section in schema.get("sections"):
diff --git a/cdm_reader_mapper/mdf_reader/utils/configurator.py b/cdm_reader_mapper/mdf_reader/utils/configurator.py
index 652c02bd..effe3fb3 100755
--- a/cdm_reader_mapper/mdf_reader/utils/configurator.py
+++ b/cdm_reader_mapper/mdf_reader/utils/configurator.py
@@ -3,11 +3,11 @@
 from __future__ import annotations
 
 import ast
-import csv
-import logging
 
-import numpy as np
 import pandas as pd
+import polars as pl
+import polars.selectors as cs
+import xarray as xr
 
 from .. import properties
 from . import converters, decoders
@@ -19,7 +19,7 @@ class Configurator:
 
     def __init__(
         self,
-        df=pd.DataFrame(),
+        df: pd.DataFrame | pl.DataFrame | xr.Dataset = pl.DataFrame(),
         schema=None,
         order=None,
         valid=None,
@@ -38,7 +38,7 @@ def _get_index(self, section, order):
         if len(self.orders) == 1:
             return section
         else:
-            return (order, section)
+            return ":".join([order, section])
 
     def _get_ignore(self, section_dict):
         ignore = section_dict.get("ignore")
@@ -47,7 +47,7 @@ def _get_ignore(self, section_dict):
         return ignore
 
     def _get_dtype(self):
-        return properties.pandas_dtypes.get(self.sections_dict.get("column_type"))
+        return properties.polars_dtypes.get(self.sections_dict.get("column_type"))
 
     def _get_converter(self):
         return converters.get(self.sections_dict.get("column_type"))
@@ -137,92 +137,148 @@ def get_configuration(self):
             },
         }
 
-    def open_pandas(self):
-        """Open TextParser to pd.DataSeries."""
-        return self.df.apply(lambda x: self._read_line(x[0]), axis=1)
-
-    def _read_line(self, line: str):
-        i = j = 0
-        data_dict = {}
-        for order in self.orders:
-            header = self.schema["sections"][order]["header"]
-
-            disable_read = header.get("disable_read")
-            if disable_read is True:
-                data_dict[order] = line[i : properties.MAX_FULL_REPORT_WIDTH]
-                continue
+    def open_text(self):
+        """Open TextParser to a polars.DataFrame"""
+        if isinstance(self.df, pd.DataFrame):
+            self.df = pl.from_pandas(self.df)
+        if not isinstance(self.df, pl.DataFrame):
+            raise TypeError(f"Cannot open with polars for {type(self.df) = }")
+        self.df = self.df
+        mask_df = pl.DataFrame()
+        for section in self.orders:
+            header = self.schema["sections"][section]["header"]
 
             sentinal = header.get("sentinal")
-            bad_sentinal = sentinal is not None and not self._validate_sentinal(
-                i, line, sentinal
-            )
 
             section_length = header.get("length", properties.MAX_FULL_REPORT_WIDTH)
-            sections = self.schema["sections"][order]["elements"]
 
+            # Get data associated with current section
+            if sentinal is not None:
+                self.df = self.df.with_columns(
+                    [
+                        (
+                            pl.when(pl.col("full_str").str.starts_with(sentinal))
+                            .then(pl.col("full_str").str.head(section_length))
+                            .otherwise(pl.lit(None))
+                            .alias(section)
+                        ),
+                        (
+                            pl.when(pl.col("full_str").str.starts_with(sentinal))
+                            .then(pl.col("full_str").str.tail(-section_length))
+                            .otherwise(pl.col("full_str"))
+                            .alias("full_str")
+                        ),
+                    ]
+                )
+            else:
+                # Sentinal is None, the section is always present
+                self.df = self.df.with_columns(
+                    [
+                        pl.col("full_str").str.head(section_length).alias(section),
+                        pl.col("full_str").str.tail(-section_length).alias("full_str"),
+                    ]
+                )
+
+            # Used for validation
+            section_missing = self.df.get_column(section).is_null()
+
+            # Don't read fields
+            disable_read = header.get("disable_read", False)
+            if disable_read is True:
+                continue
+
+            fields = self.schema["sections"][section]["elements"]
             field_layout = header.get("field_layout")
             delimiter = header.get("delimiter")
+
+            # Handle delimited section
             if delimiter is not None:
                 delimiter_format = header.get("format")
                 if delimiter_format == "delimited":
                     # Read as CSV
-                    field_names = sections.keys()
-                    fields = list(csv.reader([line[i:]], delimiter=delimiter))[0]
-                    for field_name, field in zip(field_names, fields):
-                        index = self._get_index(field_name, order)
-                        data_dict[index] = field.strip()
-                        i += len(field)
-                    j = i
+                    field_names = fields.keys()
+                    field_names = [self._get_index(x, section) for x in field_names]
+                    n_fields = len(field_names)
+                    self.df = self.df.with_columns(
+                        pl.col(section)
+                        .str.splitn(delimiter, n_fields)
+                        .struct.rename_fields(field_names)
+                        .struct.unnest()
+                    )
+                    for field in field_names:
+                        self.df = self.df.with_columns(
+                            pl.col(field).str.strip_chars(" ").name.keep()
+                        )
+                        mask_df = mask_df.with_columns(
+                            (
+                                section_missing
+                                | self.df.get_column(field).is_not_null()
+                            ).alias(field)
+                        )
+                    self.df = self.df.drop([section])
+
                     continue
                 elif field_layout != "fixed_width":
-                    logging.error(
-                        f"Delimiter for {order} is set to {delimiter}. Please specify either format or field_layout in your header schema {header}."
+                    raise ValueError(
+                        f"Delimiter for {section} is set to {delimiter}. "
+                        + f"Please specify either format or field_layout in your header schema {header}."
                     )
-                    return
 
-            k = i + section_length
-            for section, section_dict in sections.items():
-                missing = True
-                index = self._get_index(section, order)
-                ignore = (order not in self.valid) or self._get_ignore(section_dict)
-                na_value = section_dict.get("missing_value")
-                field_length = section_dict.get(
+            # Loop through fixed-width fields
+            for field, field_dict in fields.items():
+                index = self._get_index(field, section)
+                ignore = (section not in self.valid) or self._get_ignore(field_dict)
+                field_length = field_dict.get(
                     "field_length", properties.MAX_FULL_REPORT_WIDTH
                 )
+                na_value = field_dict.get("missing_value")
 
-                j = (i + field_length) if not bad_sentinal else i
-                if j > k:
-                    missing = False
-                    j = k
-
-                if ignore is not True:
-                    value = line[i:j]
-
-                    if not value.strip():
-                        value = True
-                    if value == na_value:
-                        value = True
-
-                    if i == j and missing is True:
-                        value = False
+                if ignore:
+                    # Move to next field
+                    self.df = self.df.with_columns(
+                        pl.col(section).str.tail(-field_length).name.keep(),
+                    )
+                    if delimiter is not None:
+                        self.df = self.df.with_columns(
+                            pl.col(section).str.strip_prefix(delimiter).name.keep()
+                        )
+                    continue
 
-                    data_dict[index] = value
+                missing_map = {"": None}
+                if na_value is not None:
+                    missing_map[na_value] = None
+
+                self.df = self.df.with_columns(
+                    [
+                        # If section not present in a row, then both these are null
+                        (
+                            pl.col(section)
+                            .str.head(field_length)
+                            .str.strip_chars(" ")
+                            .replace(missing_map)
+                            .alias(index)
+                        ),
+                        pl.col(section).str.tail(-field_length).name.keep(),
+                    ]
+                )
+                mask_df = mask_df.with_columns(
+                    (section_missing | self.df.get_column(index).is_not_null()).alias(
+                        index
+                    )
+                )
+                if delimiter is not None:
+                    self.df = self.df.with_columns(
+                        pl.col(section).str.strip_prefix(delimiter).name.keep()
+                    )
 
-                if delimiter is not None and line[j : j + len(delimiter)] == delimiter:
-                    j += len(delimiter)
-                i = j
+            self.df = self.df.drop([section])
 
-        return pd.Series(data_dict)
+        return self.df.drop("full_str"), mask_df
 
     def open_netcdf(self):
-        """Open netCDF to pd.Series."""
-
-        def replace_empty_strings(series):
-            if series.dtype == "object":
-                series = series.str.decode("utf-8")
-                series = series.str.strip()
-                series = series.map(lambda x: True if x == "" else x)
-            return series
+        """Open netCDF to polars.DataFrame."""
+        if not isinstance(self.df, xr.Dataset):
+            raise TypeError(f"Cannot open with netCDF for {type(self.df) = }")
 
         missing_values = []
         attrs = {}
@@ -249,15 +305,28 @@ def replace_empty_strings(series):
                 elif section in self.df.dims:
                     renames[section] = index
                 elif section in self.df.attrs:
-                    attrs[index] = self.df.attrs[index]
+                    # Initialise a constant column
+                    attrs[index] = pl.lit(self.df.attrs[section].replace("\n", "; "))
                 else:
                     missing_values.append(index)
 
-        df = self.df[renames.keys()].to_dataframe().reset_index()
-        attrs = {k: v.replace("\n", "; ") for k, v in attrs.items()}
-        df = df.rename(columns=renames)
-        df = df.assign(**attrs)
-        df[disables] = np.nan
-        df = df.apply(lambda x: replace_empty_strings(x))
-        df[missing_values] = False
-        return df
+        df: pl.DataFrame = pl.from_pandas(
+            self.df[renames.keys()].to_dataframe().reset_index()
+        )
+        df = df.rename(mapping=renames)
+        df = df.with_columns(**attrs)
+        df = df.with_columns(
+            [pl.lit(None).alias(missing) for missing in missing_values]
+        )
+        df = df.with_columns([pl.lit(None).alias(disable) for disable in disables])
+        # Replace empty or whitespace string with None
+        df = df.with_columns(
+            cs.binary().cast(pl.String).str.strip_chars().replace("", None)
+        )
+
+        # Create missing mask
+        mask_df = df.select(pl.all().is_not_null())
+        mask_df = mask_df.with_columns(
+            [pl.lit(True).alias(c) for c in missing_values + disables]
+        )
+        return df, mask_df
diff --git a/cdm_reader_mapper/mdf_reader/utils/converters.py b/cdm_reader_mapper/mdf_reader/utils/converters.py
index 4c14aa5e..9485bb45 100755
--- a/cdm_reader_mapper/mdf_reader/utils/converters.py
+++ b/cdm_reader_mapper/mdf_reader/utils/converters.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
-import pandas as pd
+import logging
+
+import polars as pl
 
 from .. import properties
-from .utilities import convert_str_boolean
 
 
 class df_converters:
@@ -16,24 +17,59 @@ def __init__(self, dtype):
         self.numeric_scale = 1.0 if self.dtype == "float" else 1
         self.numeric_offset = 0.0 if self.dtype == "float" else 0
 
-    def to_numeric(self, data, offset, scale):
-        """Convert object type elements of a pandas series to numeric type."""
+    def _check_conversion(self, data: pl.Series, converted: pl.Series, threshold: int):
+        if (
+            bad_converts := data.filter(converted.is_null() & data.is_not_null())
+        ).len() > 0:
+            msg = f"Have {bad_converts.len()} values that failed to be converted to {self.dtype}"
+            if bad_converts.len() <= threshold:
+                msg += f": values = {', '.join(bad_converts)}"
+            logging.warning(msg)
+        return None
+
+    def _drop_whitespace_vals(self, data: pl.Series):
+        data_name = data.name
+        return (
+            data.to_frame()
+            .select(
+                pl.when(data.str.contains(r"^\s*$"))
+                .then(pl.lit(None))
+                .otherwise(data)
+                .alias(data_name)
+            )
+            .get_column(data_name)
+        )
+
+    # May not be needed?
+    def decode(self, data):
+        """Decode object type elements of a pandas series to UTF-8."""
 
-        def _to_numeric(x):
-            x = convert_str_boolean(x)
-            if isinstance(x, bool):
+        def _decode(x):
+            if not isinstance(x, str):
                 return x
-            if isinstance(x, str):
-                x = x.strip()
-                x.replace(" ", "0")
+
             try:
-                return offset + float(x) * scale
-            except ValueError:
-                return False
+                encoded = x.encode("latin1")
+                return encoded.decode("utf-8")
+            except (UnicodeDecodeError, UnicodeEncodeError):
+                return x
+
+        return data.apply(lambda x: _decode(x))
 
-        return data.apply(lambda x: _to_numeric(x))
+    def to_numeric(self, data: pl.Series):
+        """Convert object type elements of a pandas series to numeric type."""
+        data = self._drop_whitespace_vals(data)
+        # str method fails if all nan, pd.Series.replace method is not the same
+        # as pd.Series.str.replace!
+        data = data.str.strip_chars().str.replace_all(" ", "0")
+
+        converted = data.cast(self.dtype, strict=False)
+        self._check_conversion(data, converted, 20)
 
-    def object_to_numeric(self, data, scale=None, offset=None):
+        #  Convert to numeric, then scale (?!) and give it's actual int type
+        return converted
+
+    def object_to_numeric(self, data: pl.Series, scale=None, offset=None):
         """
         Convert the object type elements of a pandas series to numeric type.
 
@@ -63,36 +99,40 @@ def object_to_numeric(self, data, scale=None, offset=None):
         """
         scale = scale if scale else self.numeric_scale
         offset = offset if offset else self.numeric_offset
-        if data.dtype == "object":
-            data = self.to_numeric(data, offset, scale)
-        return data
+        if not data.dtype.is_numeric():
+            data = self.to_numeric(data)
+
+        data = offset + data * scale
+        return data.cast(self.dtype)
 
-    def object_to_object(self, data, disable_white_strip=False):
+    def object_to_object(self, data: pl.Series, disable_white_strip=None):
         """DOCUMENTATION."""
         if data.dtype != "object":
             return data
-
-        if not disable_white_strip:
-            data = data.str.strip()
-        elif disable_white_strip == "l":
-            data = data.str.rstrip()
-        elif disable_white_strip == "r":
-            data = data.str.lstrip()
-        return data.apply(
-            lambda x: None if isinstance(x, str) and (x.isspace() or not x) else x
-        )
-
-    def object_to_datetime(self, data, datetime_format="%Y%m%d"):
+        data = self.decode(data)
+        if disable_white_strip is None:
+            data = data.str.strip_chars(" ")
+        else:
+            if disable_white_strip == "l":
+                data = data.str.strip_chars_end(" ")
+            elif disable_white_strip == "r":
+                data = data.str.strip_chars_start(" ")
+        return self._drop_whitespace_vals(data)
+
+    def object_to_datetime(self, data: pl.Series, datetime_format="%Y%m%d"):
         """DOCUMENTATION."""
         if data.dtype != "object":
             return data
-        return pd.to_datetime(data, format=datetime_format, errors="coerce")
+        converted = data.str.to_datetime(format=datetime_format, strict=False)
+        self._check_conversion(data, converted, 20)
+        return converted
 
 
 converters = dict()
 for dtype in properties.numeric_types:
     converters[dtype] = df_converters(dtype).object_to_numeric
-converters["datetime"] = df_converters("datetime").object_to_datetime
-converters["str"] = df_converters("str").object_to_object
-converters["object"] = df_converters("object").object_to_object
-converters["key"] = df_converters("key").object_to_object
+converters[pl.Datetime] = df_converters(pl.Datetime).object_to_datetime
+converters[pl.String] = df_converters(pl.String).object_to_object
+converters[pl.Utf8] = df_converters(pl.Utf8).object_to_object
+converters[pl.Object] = df_converters(pl.Object).object_to_object
+converters[pl.Categorical] = df_converters(pl.Categorical).object_to_object
diff --git a/cdm_reader_mapper/mdf_reader/utils/decoders.py b/cdm_reader_mapper/mdf_reader/utils/decoders.py
index e1d75ba1..1437042e 100755
--- a/cdm_reader_mapper/mdf_reader/utils/decoders.py
+++ b/cdm_reader_mapper/mdf_reader/utils/decoders.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+import logging
+
+import polars as pl
+
 from .. import properties
-from .utilities import convert_str_boolean
 
 
 class df_decoders:
@@ -11,21 +14,37 @@ class df_decoders:
 
     def __init__(self, dtype):
         # Return as object, conversion to actual type in converters only!
-        self.dtype = "object"
+        self.dtype = pl.String
+
+    def _check_decode(
+        self, data: pl.Series, decoded: pl.Series, threshold: int, method: str
+    ):
+        if (
+            bad_decode := data.filter(decoded.is_null() & data.is_not_null())
+        ).len() > 0:
+            msg = f"Have {bad_decode.len()} values that failed to be {method} decoded"
+            if bad_decode.len() <= threshold:
+                msg += f": values = {', '.join(bad_decode)}"
+            logging.warning(msg)
+        return None
 
     def base36(self, data):
         """DOCUMENTATION."""
+        # Caution: int(str(np.nan),36) ==> 30191
+        decoded = (
+            data.replace({"NaN": None})
+            .str.strip_chars(" ")
+            .replace({"": None})
+            .str.to_integer(base=36, strict=False)
+            .cast(self.dtype)
+        )
 
-        def _base36(x):
-            x = convert_str_boolean(x)
-            if isinstance(x, bool):
-                return x
-            return str(int(str(x), 36))
-
-        return data.apply(lambda x: _base36(x))
+        self._check_decode(data, decoded, 20, "base36")
+        return decoded
 
 
-decoders = {"base36": {}}
+decoders = dict()
+decoders["base36"] = dict()
 for dtype in properties.numeric_types:
     decoders["base36"][dtype] = df_decoders(dtype).base36
-decoders["base36"]["key"] = df_decoders("key").base36
+decoders["base36"][pl.Categorical] = df_decoders(pl.Categorical).base36
diff --git a/cdm_reader_mapper/mdf_reader/utils/filereader.py b/cdm_reader_mapper/mdf_reader/utils/filereader.py
index 2ab03403..afd09275 100755
--- a/cdm_reader_mapper/mdf_reader/utils/filereader.py
+++ b/cdm_reader_mapper/mdf_reader/utils/filereader.py
@@ -2,12 +2,11 @@
 
 from __future__ import annotations
 
-import csv
 import logging
 import os
 from copy import deepcopy
-from io import StringIO
 
+import polars as pl
 import pandas as pd
 import xarray as xr
 
@@ -81,31 +80,32 @@ def _adjust_schema(self, ds, dtypes):
                     else:
                         del self.schema["sections"][section]["elements"][data_var][attr]
 
-    def _select_years(self, df):
-        def get_years_from_datetime(date):
-            try:
-                return date.year
-            except AttributeError:
-                return date
-
+    def _select_years(self, df: pl.DataFrame) -> pl.DataFrame:
         if self.year_init is None and self.year_end is None:
             return df
 
+        if self.imodel is None:
+            logging.error("Selection of years is not supported for custom schema")
+            return df
+
         data_model = self.imodel.split("_")[0]
-        dates = df[properties.year_column[data_model]]
-        years = dates.apply(lambda x: get_years_from_datetime(x))
-        years = years.astype(int)
+        dates = df.get_column(properties.year_column[data_model])
+        if dates.dtype == pl.Datetime:
+            years = dates.dt.year()
+        else:
+            years = dates.cast(pl.Int64, strict=False)
 
-        mask = pd.Series([True] * len(years))
-        if self.year_init:
-            mask[years < self.year_init] = False
-        if self.year_end:
-            mask[years > self.year_end] = False
+        mask = pl.repeat(True, df.height, eager=True)
+        if self.year_init and self.year_end:
+            mask = years.is_between(self.year_init, self.year_end, closed="both")
+        elif self.year_init:
+            mask = years.ge(self.year_init)
+        elif self.year_end:
+            mask = years.le(self.year_end)
 
-        index = mask[mask].index
-        return df.iloc[index].reset_index(drop=True)
+        return df.filter(mask)
 
-    def _read_pandas(self, **kwargs):
+    def _read_text(self, **kwargs):
         return pd.read_fwf(
             self.source,
             header=None,
@@ -113,6 +113,8 @@ def _read_pandas(self, **kwargs):
             escapechar="\0",
             dtype=object,
             skip_blank_lines=False,
+            widths=[properties.MAX_FULL_REPORT_WIDTH],
+            names=["full_str"],
             **kwargs,
         )
 
@@ -126,21 +128,26 @@ def _read_sections(
         TextParser,
         order,
         valid,
-        open_with,
+        format,
     ):
-        if open_with == "pandas":
-            df = Configurator(
+        if format == "text":
+            df, mask_df = Configurator(
                 df=TextParser, schema=self.schema, order=order, valid=valid
-            ).open_pandas()
-        elif open_with == "netcdf":
-            df = Configurator(
+            ).open_text()
+        elif format == "netcdf":
+            df, mask_df = Configurator(
                 df=TextParser, schema=self.schema, order=order, valid=valid
             ).open_netcdf()
         else:
-            raise ValueError("open_with has to be one of ['pandas', 'netcdf']")
+            raise ValueError("format has to be one of ['text', 'netcdf']")
+
+        # missing_values = df.select(["index", "missing_values"]).pipe(set_missing_values)
+        df = df.pipe(self._select_years)
 
         self.columns = df.columns
-        return self._select_years(df)
+        # Replace None with NaN - is this necessary for polars?
+        # df = df.where(df.notnull(), np.nan)
+        return df, mask_df
 
     def get_configurations(self, order, valid):
         """DOCUMENTATION."""
@@ -154,51 +161,53 @@ def get_configurations(self, order, valid):
 
     def open_data(
         self,
-        order,
-        valid,
         chunksize,
-        open_with="pandas",
+        format="text",
     ):
         """DOCUMENTATION."""
         encoding = self.schema["header"].get("encoding")
-        if open_with == "netcdf":
+        if format == "netcdf":
             TextParser = self._read_netcdf()
-        elif open_with == "pandas":
-            TextParser = self._read_pandas(
+        # NOTE: Chunking - polars does have pl.read_csv_batched, but batch_size
+        # is not respected: https://github.com/pola-rs/polars/issues/19978
+        # alternative: lazy?
+        elif format == "text":
+            TextParser = self._read_text(
                 encoding=encoding,
-                widths=[properties.MAX_FULL_REPORT_WIDTH],
                 skiprows=self.skiprows,
                 chunksize=chunksize,
             )
         else:
-            raise ValueError("open_with has to be one of ['pandas', 'netcdf']")
-
-        if isinstance(TextParser, pd.DataFrame) or isinstance(TextParser, xr.Dataset):
-            return self._read_sections(TextParser, order, valid, open_with=open_with)
-        else:
-            data_buffer = StringIO()
-            for i, df_ in enumerate(TextParser):
-                df = self._read_sections(df_, order, valid, open_with=open_with)
-                df.to_csv(
-                    data_buffer,
-                    header=False,
-                    mode="a",
-                    encoding=encoding,
-                    index=False,
-                    quoting=csv.QUOTE_NONE,
-                    sep=properties.internal_delimiter,
-                    quotechar="\0",
-                    escapechar="\0",
-                )
-            data_buffer.seek(0)
-            data = pd.read_csv(
-                data_buffer,
-                names=df.columns,
-                chunksize=self.chunksize,
-                dtype=object,
-                parse_dates=self.parse_dates,
-                delimiter=properties.internal_delimiter,
-                quotechar="\0",
-                escapechar="\0",
-            )
-            return data
+            raise ValueError("format has to be one of ['text', 'netcdf']")
+
+        return TextParser
+        # if isinstance(TextParser, (pl.DataFrame, xr.Dataset)):
+        #     df, mask_df = self._read_sections(TextParser, order, valid, open_with=open_with)
+        #     return df, mask_df
+        # else:
+        #     data_buffer = StringIO()
+        #     for i, df_ in enumerate(TextParser):
+        #         df = self._read_sections(df_, order, valid, open_with=open_with)
+        #         df.to_csv(
+        #             data_buffer,
+        #             header=False,
+        #             mode="a",
+        #             encoding="utf-8",
+        #             index=False,
+        #             quoting=csv.QUOTE_NONE,
+        #             sep=properties.internal_delimiter,
+        #             quotechar="\0",
+        #             escapechar="\0",
+        #         )
+        #     data_buffer.seek(0)
+        #     data = pd.read_csv(
+        #         data_buffer,
+        #         names=df.columns,
+        #         chunksize=self.chunksize,
+        #         dtype=object,
+        #         parse_dates=self.parse_dates,
+        #         delimiter=properties.internal_delimiter,
+        #         quotechar="\0",
+        #         escapechar="\0",
+        #     )
+        #     return data
diff --git a/cdm_reader_mapper/mdf_reader/utils/validators.py b/cdm_reader_mapper/mdf_reader/utils/validators.py
index 8787dfbb..7f2fd0d1 100755
--- a/cdm_reader_mapper/mdf_reader/utils/validators.py
+++ b/cdm_reader_mapper/mdf_reader/utils/validators.py
@@ -5,7 +5,7 @@
 import logging
 
 import numpy as np
-import pandas as pd
+import polars as pl
 
 from .. import properties
 from ..codes import codes
@@ -13,36 +13,24 @@
 from .utilities import convert_str_boolean
 
 
-def validate_datetime(elements, data):
+def validate_datetime(mask, elements, data: pl.DataFrame):
     """DOCUMENTATION."""
+    for element in elements:
+        col = data.get_column(element)
+        if not col.dtype.is_temporal():
+            mask = mask.with_columns(pl.lit(False).alias(element))
+            continue
 
-    def is_date_object(object):
-        if hasattr(object, "year"):
-            return True
+        mask = mask.with_columns((pl.col(element) & col.is_not_null()).alias(element))
 
-    mask = pd.DataFrame(index=data.index, data=False, columns=elements)
-    mask[elements] = (
-        data[elements].apply(np.vectorize(is_date_object)) | data[elements].isna()
-    )
     return mask
 
 
-def validate_numeric(elements, data, schema):
+def validate_numeric(mask: pl.DataFrame, elements, data: pl.DataFrame, schema):
     """DOCUMENTATION."""
-
-    # Find thresholds in schema. Flag if not available -> warn
-    def _to_numeric(x):
-        if x is None:
-            return np.nan
-        x = convert_str_boolean(x)
-        if isinstance(x, bool):
-            return x
-        return float(x)
-
-    data[elements] = data[elements].map(_to_numeric)
-    mask = pd.DataFrame(index=data.index, data=False, columns=elements)
-    lower = {x: schema.get(x).get("valid_min", -np.inf) for x in elements}
-    upper = {x: schema.get(x).get("valid_max", np.inf) for x in elements}
+    # Handle cases where value is explicitly None in the dictionary
+    lower = {x: schema.get(x).get("valid_min") or -np.inf for x in elements}
+    upper = {x: schema.get(x).get("valid_max") or np.inf for x in elements}
 
     set_elements = [
         x for x in lower.keys() if lower.get(x) != -np.inf and upper.get(x) != np.inf
@@ -57,26 +45,45 @@ def _to_numeric(x):
         logging.warning(
             "Corresponding upper and/or lower bounds set to +/-inf for validation"
         )
-    mask[elements] = (
-        (data[elements] >= [lower.get(x) for x in elements])
-        & (data[elements] <= [upper.get(x) for x in elements])
-    ) | data[elements].isna()
+
+    mask = mask.with_columns(
+        [
+            (
+                pl.col(element)
+                | (
+                    data[element].is_not_null()
+                    & data[element].is_not_nan()
+                    & data[element].is_between(
+                        lower.get(element), upper.get(element), closed="both"
+                    )
+                )
+            ).alias(element)
+            for element in elements
+        ]
+    )
     return mask
 
 
-def validate_str(elements, data):
+def validate_str(mask, elements, data):
     """DOCUMENTATION."""
-    return pd.DataFrame(index=data.index, data=True, columns=elements)
+    return mask
 
 
-def validate_codes(elements, data, schema, imodel, ext_table_path):
+def validate_codes(
+    mask: pl.DataFrame,
+    elements: list[str],
+    data: pl.DataFrame,
+    schema,
+    imodel,
+    ext_table_path,
+):
     """DOCUMENTATION."""
-    mask = pd.DataFrame(index=data.index, data=False, columns=elements)
     for element in elements:
         code_table_name = schema.get(element).get("codetable")
         if not code_table_name:
             logging.error(f"Code table not defined for element {element}")
             logging.warning("Element mask set to False")
+            mask = mask.with_columns(pl.lit(False).alias(element))
             continue
 
         table = codes.read_table(
@@ -87,19 +94,18 @@ def validate_codes(elements, data, schema, imodel, ext_table_path):
         if not table:
             continue
 
-        dtype = properties.pandas_dtypes.get(schema.get(element).get("column_type"))
+        dtype = properties.polars_dtypes.get(schema.get(element).get("column_type"))
 
         table_keys = list(table.keys())
-        validation_df = data[element]
-        value = validation_df.astype(dtype).astype("str")
-        valid = validation_df.notna()
-        mask_ = value.isin(table_keys)
-        mask[element] = mask_.where(valid, True)
+        col = data.get_column(element)
+        col = col.cast(dtype).cast(pl.String)
+        valid = col.is_not_null() & col.is_not_nan() & col.is_in(table_keys)
+        mask = mask.with_columns((pl.col(element) | valid).alias(element))
 
     return mask
 
 
-def _get_elements(elements, element_atts, key):
+def _get_elements(elements, element_atts, key) -> list[str]:
     def _condition(x):
         column_types = element_atts.get(x).get("column_type")
         if key == "numeric_types":
@@ -125,7 +131,8 @@ def _mask_boolean(x, boolean):
 
 
 def validate(
-    data,
+    data: pl.DataFrame,
+    mask: pl.DataFrame,
     imodel,
     ext_table_path,
     schema,
@@ -159,37 +166,25 @@ def validate(
         filename=None,
     )
     # Check input
-    if not isinstance(data, pd.DataFrame):  # or not isinstance(mask0, pd.DataFrame):
+    if not isinstance(data, pl.DataFrame) or not isinstance(mask, pl.DataFrame):
         # logging.error("Input data and mask must be a pandas data frame object")
         logging.error("input data must be a pandas DataFrame.")
         return
 
-    mask = pd.DataFrame(index=data.index, columns=data.columns, dtype=object)
-    if data.empty:
+    if data.is_empty():
         return mask
 
+    disables = disables or []
+
     # Get the data elements from the input data: might be just a subset of
     # data model and flatten the schema to get a simple and sequential list
     # of elements included in the input data
-    elements = [x for x in data if x not in disables]
+    elements = [x for x in data.columns if x not in disables]
     element_atts = schemas.df_schema(elements, schema)
 
-    # See what elements we need to validate
+    # 1. Numeric elements
     numeric_elements = _get_elements(elements, element_atts, "numeric_types")
-    datetime_elements = _get_elements(elements, element_atts, "datetime")
-    coded_elements = _get_elements(elements, element_atts, "key")
-    str_elements = _get_elements(elements, element_atts, "str")
-
-    if _element_tuples(numeric_elements, datetime_elements, coded_elements):
-        validated_columns = pd.MultiIndex.from_tuples(
-            list(set(numeric_elements + coded_elements + datetime_elements))
-        )
-    else:
-        validated_columns = list(
-            set(numeric_elements + coded_elements + datetime_elements)
-        )
-
-    mask[numeric_elements] = validate_numeric(numeric_elements, data, element_atts)
+    mask = validate_numeric(mask, numeric_elements, data, element_atts)
 
     # 2. Table coded elements
     # See following: in multiple keys code tables, the non parameter element,
@@ -201,8 +196,10 @@ def validate(
     # Get the full list of keys combinations (tuples, triplets...) and check the column combination against that: if it fails, mark the element!
     # Need to see how to grab the YEAR part of a datetime when YEAR comes from a datetime element
     # pd.DatetimeIndex(df['_datetime']).year
+    coded_elements = _get_elements(elements, element_atts, "key")
     if len(coded_elements) > 0:
-        mask[coded_elements] = validate_codes(
+        mask = validate_codes(
+            mask,
             coded_elements,
             data,
             element_atts,
@@ -211,21 +208,12 @@ def validate(
         )
 
     # 3. Datetime elements
-    mask[datetime_elements] = validate_datetime(datetime_elements, data)
+    datetime_elements = _get_elements(elements, element_atts, "datetime")
+    mask = validate_datetime(mask, datetime_elements, data)
 
     # 4. str elements
-    mask[str_elements] = validate_str(str_elements, data)
-
-    # 5. Set False values
-    mask[validated_columns] = mask[validated_columns].mask(
-        data[validated_columns].map(_mask_boolean, boolean=False),
-        False,
-    )
-
-    mask[validated_columns] = mask[validated_columns].mask(
-        data[validated_columns].map(_mask_boolean, boolean=True),
-        True,
-    )
+    str_elements = _get_elements(elements, element_atts, "str")
+    mask = validate_str(mask, str_elements, data)
 
-    mask[disables] = np.nan
+    mask = mask.with_columns([pl.lit(None).alias(disable) for disable in disables])
     return mask
diff --git a/cdm_reader_mapper/properties.py b/cdm_reader_mapper/properties.py
index 59668b9f..2ecaef89 100755
--- a/cdm_reader_mapper/properties.py
+++ b/cdm_reader_mapper/properties.py
@@ -2,8 +2,23 @@
 
 from __future__ import annotations
 
-numeric_types = ["Int64", "int", "float"]
+import polars as pl
 
-object_types = ["str", "object", "key", "datetime"]
+# numeric_types = ["Int64", "int", "float"]
+numeric_types = [
+    pl.Float64,
+    pl.Float32,
+    pl.Int64,
+    pl.Int32,
+    pl.Int16,
+    pl.Int8,
+    pl.UInt64,
+    pl.UInt32,
+    pl.UInt16,
+    pl.UInt8,
+]
+
+# object_types = ["str", "object", "key", "datetime"]
+object_types = [pl.String, pl.Utf8, pl.Datetime, pl.Object, pl.Categorical]
 
 supported_data_models = ["craid", "gcc", "icoads", "pub47"]
diff --git a/docs/example_notebooks/read_overview.ipynb b/docs/example_notebooks/read_overview.ipynb
index de546ab3..ced54b4b 100755
--- a/docs/example_notebooks/read_overview.ipynb
+++ b/docs/example_notebooks/read_overview.ipynb
@@ -16,7 +16,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-01-07 11:44:05,441 - root - INFO - init basic configure of logging success\n"
+      "2025-01-31 09:29:23,553 - root - INFO - init basic configure of logging success\n",
+      "/Users/josidd/git/github/cdm_fork/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }
    ],
@@ -24,6 +26,7 @@
     "from __future__ import annotations\n",
     "\n",
     "import pandas as pd\n",
+    "import polars as pl\n",
     "\n",
     "from cdm_reader_mapper import properties, read_mdf, test_data"
    ]
@@ -51,7 +54,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-01-07 11:44:09,196 - root - INFO - Attempting to fetch remote file: icoads/r300/d704/input/icoads_r300_d704_1878-10-01_subset.imma.md5\n"
+      "2025-01-31 09:29:27,576 - root - INFO - Attempting to fetch remote file: icoads/r300/d704/input/icoads_r300_d704_1878-10-01_subset.imma.md5\n"
      ]
     },
     {
@@ -130,7 +133,7 @@
     "A **schema** file gathers a collection of descriptors that enable the `mdf_reader` tool to access the content\n",
     "of a `data model/ schema` and extract the sections of the raw data file that contains meaningful information. These **schema files** are the `bones` of the data model, basically `.json` files outlining the structure of the incoming raw data.\n",
     "\n",
-    "The `mdf_reader` takes this information and translate the characteristics of the data to a python pandas dataframe.\n",
+    "The `mdf_reader` takes this information and translate the characteristics of the data to a python polars dataframe.\n",
     "\n",
     "The tool has several **schema** templates build in."
    ]
@@ -171,19 +174,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-01-07 11:44:10,310 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
-      "2025-01-07 11:44:10,327 - root - INFO - EXTRACTING DATA FROM MODEL: icoads_r300_d704\n",
-      "2025-01-07 11:44:10,327 - root - INFO - Getting data string from source...\n",
-      "2025-01-07 11:44:11,139 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_journal', 'vessel_length'),('c99_journal', 'vessel_beam'),('c99_journal', 'hold_depth'),('c99_journal', 'tonnage'),('c99_journal', 'baro_height'),('c99_daily', 'year'),('c99_daily', 'month'),('c99_daily', 'day'),('c99_daily', 'distance'),('c99_daily', 'lat_deg_an'),('c99_daily', 'lat_min_an'),('c99_daily', 'lon_deg_an'),('c99_daily', 'lon_min_an'),('c99_daily', 'lat_deg_on'),('c99_daily', 'lat_min_on'),('c99_daily', 'lon_deg_of'),('c99_daily', 'lon_min_of'),('c99_daily', 'current_speed'),('c99_data4', 'year'),('c99_data4', 'month'),('c99_data4', 'day'),('c99_data4', 'hour'),('c99_data4', 'ship_speed'),('c99_data4', 'compass_correction'),('c99_data4', 'attached_thermometer'),('c99_data4', 'air_temperature'),('c99_data4', 'wet_bulb_temperature'),('c99_data4', 'sea_temperature'),('c99_data4', 'sky_clear'),('c99_data5', 'year'),('c99_data5', 'month'),('c99_data5', 'day'),('c99_data5', 'hour'),('c99_data5', 'ship_speed'),('c99_data5', 'attached_thermometer'),('c99_data5', 'air_temperature'),('c99_data5', 'wet_bulb_temperature'),('c99_data5', 'sea_temperature'),('c99_data5', 'sky_clear'),('c99_data5', 'compass_correction')\n",
-      "2025-01-07 11:44:11,139 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n",
-      "2025-01-07 11:44:12,409 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n"
+      "2025-01-31 09:29:27,710 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
+      "2025-01-31 09:29:27,718 - root - INFO - EXTRACTING DATA FROM MODEL: icoads_r300_d704\n",
+      "2025-01-31 09:29:27,718 - root - INFO - Getting data string from source...\n",
+      "2025-01-31 09:29:27,789 - root - INFO - Extracting and reading sections\n",
+      "2025-01-31 09:29:27,875 - root - INFO - Create output DataBundle object\n"
      ]
     }
    ],
    "source": [
     "schema = \"icoads_r300_d704\"\n",
     "\n",
-    "data = read_mdf(data_path, imodel=schema)"
+    "data = read_mdf(data_path, imodel=schema, validate=False)"
    ]
   },
   {
@@ -215,7 +217,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now metadata information can be extracted as a component of the padas dataframe."
+    "The full output `polars.DataFrame` can be accessed:"
    ]
   },
   {
@@ -226,196 +228,31 @@
     {
      "data": {
       "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
        "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>sentinal</th>\n",
-       "      <th>reel_no</th>\n",
-       "      <th>journal_no</th>\n",
-       "      <th>frame_no</th>\n",
-       "      <th>ship_name</th>\n",
-       "      <th>journal_ed</th>\n",
-       "      <th>rig</th>\n",
-       "      <th>ship_material</th>\n",
-       "      <th>vessel_type</th>\n",
-       "      <th>vessel_length</th>\n",
-       "      <th>...</th>\n",
-       "      <th>hold_depth</th>\n",
-       "      <th>tonnage</th>\n",
-       "      <th>baro_type</th>\n",
-       "      <th>baro_height</th>\n",
-       "      <th>baro_cdate</th>\n",
-       "      <th>baro_loc</th>\n",
-       "      <th>baro_units</th>\n",
-       "      <th>baro_cor</th>\n",
-       "      <th>thermo_mount</th>\n",
-       "      <th>SST_I</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>002</td>\n",
-       "      <td>0018</td>\n",
-       "      <td>0003</td>\n",
-       "      <td>Panay</td>\n",
-       "      <td>78</td>\n",
-       "      <td>01</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>187</td>\n",
-       "      <td>...</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Bulkhead of cabin</td>\n",
-       "      <td>1</td>\n",
-       "      <td>- .102</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>002</td>\n",
-       "      <td>0018</td>\n",
-       "      <td>0003</td>\n",
-       "      <td>Panay</td>\n",
-       "      <td>78</td>\n",
-       "      <td>01</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>187</td>\n",
-       "      <td>...</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Bulkhead of cabin</td>\n",
-       "      <td>1</td>\n",
-       "      <td>- .102</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>002</td>\n",
-       "      <td>0018</td>\n",
-       "      <td>0003</td>\n",
-       "      <td>Panay</td>\n",
-       "      <td>78</td>\n",
-       "      <td>01</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>187</td>\n",
-       "      <td>...</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Bulkhead of cabin</td>\n",
-       "      <td>1</td>\n",
-       "      <td>- .102</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>002</td>\n",
-       "      <td>0018</td>\n",
-       "      <td>0003</td>\n",
-       "      <td>Panay</td>\n",
-       "      <td>78</td>\n",
-       "      <td>01</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>187</td>\n",
-       "      <td>...</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Bulkhead of cabin</td>\n",
-       "      <td>1</td>\n",
-       "      <td>- .102</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>002</td>\n",
-       "      <td>0018</td>\n",
-       "      <td>0003</td>\n",
-       "      <td>Panay</td>\n",
-       "      <td>78</td>\n",
-       "      <td>01</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>187</td>\n",
-       "      <td>...</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Bulkhead of cabin</td>\n",
-       "      <td>1</td>\n",
-       "      <td>- .102</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 24 columns</p>\n",
-       "</div>"
+       "<small>shape: (5, 418)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>_core_missing</th><th>core:YR</th><th>core:MO</th><th>core:DY</th><th>core:HR</th><th>core:LAT</th><th>core:LON</th><th>core:IM</th><th>core:ATTC</th><th>core:TI</th><th>core:LI</th><th>core:DS</th><th>core:VS</th><th>core:NID</th><th>core:II</th><th>core:ID</th><th>core:C1</th><th>core:DI</th><th>core:D</th><th>core:WI</th><th>core:W</th><th>core:VI</th><th>core:VV</th><th>core:WW</th><th>core:W1</th><th>core:SLP</th><th>core:A</th><th>core:PPP</th><th>core:IT</th><th>core:AT</th><th>core:WBTI</th><th>core:WBT</th><th>core:DPTI</th><th>core:DPT</th><th>core:SI</th><th>core:SST</th><th>&hellip;</th><th>c99_data4:sea_temperature</th><th>c99_data4:present_weather</th><th>c99_data4:clouds</th><th>c99_data4:sky_clear</th><th>c99_data4:sea_state</th><th>_c99_data5_missing</th><th>c99_data5:sentinal</th><th>c99_data5:reel_no</th><th>c99_data5:journal_no</th><th>c99_data5:frame_start</th><th>c99_data5:frame</th><th>c99_data5:year</th><th>c99_data5:month</th><th>c99_data5:day</th><th>c99_data5:time_ind</th><th>c99_data5:hour</th><th>c99_data5:ship_speed</th><th>c99_data5:compass_ind</th><th>c99_data5:ship_course_compass</th><th>c99_data5:blank</th><th>c99_data5:ship_course_true</th><th>c99_data5:wind_dir_mag</th><th>c99_data5:wind_dir_true</th><th>c99_data5:wind_force</th><th>c99_data5:barometer</th><th>c99_data5:temp_ind</th><th>c99_data5:attached_thermometer</th><th>c99_data5:air_temperature</th><th>c99_data5:wet_bulb_temperature</th><th>c99_data5:sea_temperature</th><th>c99_data5:present_weather</th><th>c99_data5:clouds</th><th>c99_data5:sky_clear</th><th>c99_data5:sea_state</th><th>c99_data5:compass_correction_ind</th><th>c99_data5:compass_correction</th><th>c99_data5:compass_correction_dir</th></tr><tr><td>u32</td><td>bool</td><td>i64</td><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>str</td><td>i64</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>i64</td><td>str</td><td>f64</td><td>str</td><td>str</td><td>str</td><td>str</td><td>f64</td><td>str</td><td>f64</td><td>str</td><td>f64</td><td>str</td><td>f64</td><td>str</td><td>f64</td><td>str</td><td>f64</td><td>&hellip;</td><td>f64</td><td>str</td><td>str</td><td>i64</td><td>str</td><td>bool</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>i64</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>f64</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>str</td><td>str</td><td>i64</td><td>str</td><td>str</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>0</td><td>false</td><td>1878</td><td>10</td><td>20</td><td>6.0</td><td>42.28</td><td>291.59</td><td>&quot;1&quot;</td><td>3</td><td>&quot;0&quot;</td><td>&quot;6&quot;</td><td>&quot;2&quot;</td><td>&quot;3&quot;</td><td>null</td><td>&quot;10&quot;</td><td>&quot;Panay&quot;</td><td>null</td><td>&quot;1&quot;</td><td>232</td><td>&quot;5&quot;</td><td>12.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>996.1</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&hellip;</td><td>null</td><td>&quot;BOC&quot;</td><td>&quot;CU&quot;</td><td>5</td><td>&quot;R&quot;</td><td>true</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>1</td><td>false</td><td>1878</td><td>10</td><td>20</td><td>8.0</td><td>42.31</td><td>291.97</td><td>&quot;1&quot;</td><td>3</td><td>&quot;0&quot;</td><td>&quot;6&quot;</td><td>&quot;2&quot;</td><td>&quot;3&quot;</td><td>null</td><td>&quot;10&quot;</td><td>&quot;Panay&quot;</td><td>null</td><td>&quot;1&quot;</td><td>232</td><td>&quot;5&quot;</td><td>12.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>996.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&hellip;</td><td>null</td><td>&quot;BOC&quot;</td><td>&quot;SC&quot;</td><td>3</td><td>&quot;R&quot;</td><td>true</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>2</td><td>false</td><td>1878</td><td>10</td><td>20</td><td>10.0</td><td>42.33</td><td>292.36</td><td>&quot;1&quot;</td><td>3</td><td>&quot;0&quot;</td><td>&quot;6&quot;</td><td>&quot;2&quot;</td><td>&quot;3&quot;</td><td>null</td><td>&quot;10&quot;</td><td>&quot;Panay&quot;</td><td>null</td><td>&quot;1&quot;</td><td>254</td><td>&quot;5&quot;</td><td>12.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>996.9</td><td>null</td><td>null</td><td>&quot;7&quot;</td><td>8.9</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;1&quot;</td><td>11.1</td><td>&hellip;</td><td>5.2</td><td>&quot;OCG&quot;</td><td>&quot;SC&quot;</td><td>0</td><td>&quot;R&quot;</td><td>true</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>3</td><td>false</td><td>1878</td><td>10</td><td>20</td><td>12.0</td><td>42.35</td><td>292.71</td><td>&quot;1&quot;</td><td>3</td><td>&quot;0&quot;</td><td>&quot;6&quot;</td><td>&quot;2&quot;</td><td>&quot;3&quot;</td><td>null</td><td>&quot;10&quot;</td><td>&quot;Panay&quot;</td><td>null</td><td>&quot;1&quot;</td><td>254</td><td>&quot;5&quot;</td><td>12.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>997.6</td><td>null</td><td>null</td><td>&quot;7&quot;</td><td>8.9</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;1&quot;</td><td>11.1</td><td>&hellip;</td><td>5.2</td><td>&quot;CG&quot;</td><td>&quot;SC&quot;</td><td>0</td><td>&quot;R&quot;</td><td>true</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>4</td><td>false</td><td>1878</td><td>10</td><td>20</td><td>14.0</td><td>42.37</td><td>293.1</td><td>&quot;1&quot;</td><td>3</td><td>&quot;0&quot;</td><td>&quot;6&quot;</td><td>&quot;2&quot;</td><td>&quot;3&quot;</td><td>null</td><td>&quot;10&quot;</td><td>&quot;Panay&quot;</td><td>null</td><td>&quot;1&quot;</td><td>254</td><td>&quot;5&quot;</td><td>12.3</td><td>null</td><td>null</td><td>null</td><td>null</td><td>999.2</td><td>null</td><td>null</td><td>&quot;7&quot;</td><td>8.9</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;1&quot;</td><td>10.0</td><td>&hellip;</td><td>5.0</td><td>&quot;BC&quot;</td><td>&quot;SC&quot;</td><td>2</td><td>&quot;L&quot;</td><td>true</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "  sentinal reel_no journal_no frame_no ship_name journal_ed rig ship_material  \\\n",
-       "0        1     002       0018     0003     Panay         78  01             1   \n",
-       "1        1     002       0018     0003     Panay         78  01             1   \n",
-       "2        1     002       0018     0003     Panay         78  01             1   \n",
-       "3        1     002       0018     0003     Panay         78  01             1   \n",
-       "4        1     002       0018     0003     Panay         78  01             1   \n",
-       "\n",
-       "  vessel_type  vessel_length  ...  hold_depth tonnage baro_type baro_height  \\\n",
-       "0           1            187  ...          23    1190         2          14   \n",
-       "1           1            187  ...          23    1190         2          14   \n",
-       "2           1            187  ...          23    1190         2          14   \n",
-       "3           1            187  ...          23    1190         2          14   \n",
-       "4           1            187  ...          23    1190         2          14   \n",
-       "\n",
-       "   baro_cdate           baro_loc baro_units  baro_cor thermo_mount SST_I  \n",
-       "0         NaN  Bulkhead of cabin          1    - .102            2   NaN  \n",
-       "1         NaN  Bulkhead of cabin          1    - .102            2   NaN  \n",
-       "2         NaN  Bulkhead of cabin          1    - .102            2   NaN  \n",
-       "3         NaN  Bulkhead of cabin          1    - .102            2   NaN  \n",
-       "4         NaN  Bulkhead of cabin          1    - .102            2   NaN  \n",
-       "\n",
-       "[5 rows x 24 columns]"
+       "shape: (5, 418)\n",
+       "┌───────┬─────────────┬─────────┬─────────┬───┬─────────────┬────────────┬────────────┬────────────┐\n",
+       "│ index ┆ _core_missi ┆ core:YR ┆ core:MO ┆ … ┆ c99_data5:s ┆ c99_data5: ┆ c99_data5: ┆ c99_data5: │\n",
+       "│ ---   ┆ ng          ┆ ---     ┆ ---     ┆   ┆ ea_state    ┆ compass_co ┆ compass_co ┆ compass_co │\n",
+       "│ u32   ┆ ---         ┆ i64     ┆ i64     ┆   ┆ ---         ┆ rrection_i ┆ rrection   ┆ rrection_d │\n",
+       "│       ┆ bool        ┆         ┆         ┆   ┆ str         ┆ …          ┆ ---        ┆ …          │\n",
+       "│       ┆             ┆         ┆         ┆   ┆             ┆ ---        ┆ f64        ┆ ---        │\n",
+       "│       ┆             ┆         ┆         ┆   ┆             ┆ str        ┆            ┆ str        │\n",
+       "╞═══════╪═════════════╪═════════╪═════════╪═══╪═════════════╪════════════╪════════════╪════════════╡\n",
+       "│ 0     ┆ false       ┆ 1878    ┆ 10      ┆ … ┆ null        ┆ null       ┆ null       ┆ null       │\n",
+       "│ 1     ┆ false       ┆ 1878    ┆ 10      ┆ … ┆ null        ┆ null       ┆ null       ┆ null       │\n",
+       "│ 2     ┆ false       ┆ 1878    ┆ 10      ┆ … ┆ null        ┆ null       ┆ null       ┆ null       │\n",
+       "│ 3     ┆ false       ┆ 1878    ┆ 10      ┆ … ┆ null        ┆ null       ┆ null       ┆ null       │\n",
+       "│ 4     ┆ false       ┆ 1878    ┆ 10      ┆ … ┆ null        ┆ null       ┆ null       ┆ null       │\n",
+       "└───────┴─────────────┴─────────┴─────────┴───┴─────────────┴────────────┴────────────┴────────────┘"
       ]
      },
      "execution_count": 5,
@@ -424,29 +261,72 @@
     }
    ],
    "source": [
-    "data.data.c99_journal"
+    "data.data"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To learn how to construct a schema or data model for a particular deck/source, visit this other [tutorial notebook](https://github.com/glamod/cdm_reader_mapper/blob/main/docs/example_notebooks/CLIWOC_datamodel.ipynb)"
+    "Now metadata information can be extracted as a component of the polars dataframe."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (5, 24)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>c99_journal:sentinal</th><th>c99_journal:reel_no</th><th>c99_journal:journal_no</th><th>c99_journal:frame_no</th><th>c99_journal:ship_name</th><th>c99_journal:journal_ed</th><th>c99_journal:rig</th><th>c99_journal:ship_material</th><th>c99_journal:vessel_type</th><th>c99_journal:vessel_length</th><th>c99_journal:vessel_beam</th><th>c99_journal:commander</th><th>c99_journal:country</th><th>c99_journal:screw_paddle</th><th>c99_journal:hold_depth</th><th>c99_journal:tonnage</th><th>c99_journal:baro_type</th><th>c99_journal:baro_height</th><th>c99_journal:baro_cdate</th><th>c99_journal:baro_loc</th><th>c99_journal:baro_units</th><th>c99_journal:baro_cor</th><th>c99_journal:thermo_mount</th><th>c99_journal:SST_I</th></tr><tr><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>i64</td><td>i64</td><td>str</td><td>str</td><td>str</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;1&quot;</td><td>&quot;002&quot;</td><td>&quot;0018&quot;</td><td>&quot;0003&quot;</td><td>&quot;Panay&quot;</td><td>&quot;78&quot;</td><td>&quot;01&quot;</td><td>&quot;1&quot;</td><td>&quot;1&quot;</td><td>187</td><td>37</td><td>&quot;S.P.Bray,Jr&quot;</td><td>&quot;01&quot;</td><td>&quot;3&quot;</td><td>23</td><td>1190</td><td>&quot;2&quot;</td><td>14</td><td>null</td><td>&quot;Bulkhead of cabin&quot;</td><td>&quot;1&quot;</td><td>&quot;- .102&quot;</td><td>&quot;2&quot;</td><td>null</td></tr><tr><td>&quot;1&quot;</td><td>&quot;002&quot;</td><td>&quot;0018&quot;</td><td>&quot;0003&quot;</td><td>&quot;Panay&quot;</td><td>&quot;78&quot;</td><td>&quot;01&quot;</td><td>&quot;1&quot;</td><td>&quot;1&quot;</td><td>187</td><td>37</td><td>&quot;S.P.Bray,Jr&quot;</td><td>&quot;01&quot;</td><td>&quot;3&quot;</td><td>23</td><td>1190</td><td>&quot;2&quot;</td><td>14</td><td>null</td><td>&quot;Bulkhead of cabin&quot;</td><td>&quot;1&quot;</td><td>&quot;- .102&quot;</td><td>&quot;2&quot;</td><td>null</td></tr><tr><td>&quot;1&quot;</td><td>&quot;002&quot;</td><td>&quot;0018&quot;</td><td>&quot;0003&quot;</td><td>&quot;Panay&quot;</td><td>&quot;78&quot;</td><td>&quot;01&quot;</td><td>&quot;1&quot;</td><td>&quot;1&quot;</td><td>187</td><td>37</td><td>&quot;S.P.Bray,Jr&quot;</td><td>&quot;01&quot;</td><td>&quot;3&quot;</td><td>23</td><td>1190</td><td>&quot;2&quot;</td><td>14</td><td>null</td><td>&quot;Bulkhead of cabin&quot;</td><td>&quot;1&quot;</td><td>&quot;- .102&quot;</td><td>&quot;2&quot;</td><td>null</td></tr><tr><td>&quot;1&quot;</td><td>&quot;002&quot;</td><td>&quot;0018&quot;</td><td>&quot;0003&quot;</td><td>&quot;Panay&quot;</td><td>&quot;78&quot;</td><td>&quot;01&quot;</td><td>&quot;1&quot;</td><td>&quot;1&quot;</td><td>187</td><td>37</td><td>&quot;S.P.Bray,Jr&quot;</td><td>&quot;01&quot;</td><td>&quot;3&quot;</td><td>23</td><td>1190</td><td>&quot;2&quot;</td><td>14</td><td>null</td><td>&quot;Bulkhead of cabin&quot;</td><td>&quot;1&quot;</td><td>&quot;- .102&quot;</td><td>&quot;2&quot;</td><td>null</td></tr><tr><td>&quot;1&quot;</td><td>&quot;002&quot;</td><td>&quot;0018&quot;</td><td>&quot;0003&quot;</td><td>&quot;Panay&quot;</td><td>&quot;78&quot;</td><td>&quot;01&quot;</td><td>&quot;1&quot;</td><td>&quot;1&quot;</td><td>187</td><td>37</td><td>&quot;S.P.Bray,Jr&quot;</td><td>&quot;01&quot;</td><td>&quot;3&quot;</td><td>23</td><td>1190</td><td>&quot;2&quot;</td><td>14</td><td>null</td><td>&quot;Bulkhead of cabin&quot;</td><td>&quot;1&quot;</td><td>&quot;- .102&quot;</td><td>&quot;2&quot;</td><td>null</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (5, 24)\n",
+       "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n",
+       "│ c99_journ ┆ c99_journ ┆ c99_journ ┆ c99_journ ┆ … ┆ c99_journ ┆ c99_journ ┆ c99_journ ┆ c99_jour │\n",
+       "│ al:sentin ┆ al:reel_n ┆ al:journa ┆ al:frame_ ┆   ┆ al:baro_u ┆ al:baro_c ┆ al:thermo ┆ nal:SST_ │\n",
+       "│ al        ┆ o         ┆ l_no      ┆ no        ┆   ┆ nits      ┆ or        ┆ _mount    ┆ I        │\n",
+       "│ ---       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │\n",
+       "│ str       ┆ str       ┆ str       ┆ str       ┆   ┆ str       ┆ str       ┆ str       ┆ str      │\n",
+       "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n",
+       "│ 1         ┆ 002       ┆ 0018      ┆ 0003      ┆ … ┆ 1         ┆ - .102    ┆ 2         ┆ null     │\n",
+       "│ 1         ┆ 002       ┆ 0018      ┆ 0003      ┆ … ┆ 1         ┆ - .102    ┆ 2         ┆ null     │\n",
+       "│ 1         ┆ 002       ┆ 0018      ┆ 0003      ┆ … ┆ 1         ┆ - .102    ┆ 2         ┆ null     │\n",
+       "│ 1         ┆ 002       ┆ 0018      ┆ 0003      ┆ … ┆ 1         ┆ - .102    ┆ 2         ┆ null     │\n",
+       "│ 1         ┆ 002       ┆ 0018      ┆ 0003      ┆ … ┆ 1         ┆ - .102    ┆ 2         ┆ null     │\n",
+       "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.data.select(pl.col(\"^c99_journal:.*$\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To learn how to construct a schema or data model for a particular deck/source, visit this other [tutorial notebook](https://github.com/glamod/cdm_reader_mapper/blob/main/docs/example_notebooks/CLIWOC_datamodel.ipynb)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "cdm",
    "language": "python",
-   "name": "python3"
+   "name": "cdm"
   },
   "language_info": {
    "codemirror_mode": {
@@ -458,7 +338,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.12.8"
   }
  },
  "nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index 35532d0e..730b639a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
   "numba >=0.60.0",
   "pandas>=2.2.0",
   "platformdirs >4.0.0",
+  "polars >= 1.26",
   "pyarrow >=15.0.0",
   "python-slugify",
   "recordlinkage >= 0.15",
@@ -225,7 +226,9 @@ target-version = "py312"
 exclude = [
   ".git",
   "build",
-  ".eggs"
+  ".eggs",
+  ".venv",
+  "venv"
 ]
 extend-include = [
   "*.ipynb" # Include notebooks
@@ -261,6 +264,7 @@ check-typed-exception = true
 "matplotlib.pyplot" = "plt"
 numpy = "np"
 pandas = "pd"
+polars = "pl"
 scipy = "sp"
 xarray = "xr"