diff --git a/.gitignore b/.gitignore index 8826ec0..b48cd49 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ # recursively re-ignore __pycache__ *.egg-info/ +docs/build diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eb3b3bf..da84716 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,7 @@ repos: - id: check-yaml - id: check-toml - id: check-added-large-files + args: [ --maxkb=1024 ] - id: requirements-txt-fixer - repo: https://github.com/Spill-Tea/addlicense-pre-commit @@ -19,8 +20,7 @@ repos: hooks: - id: addlicense language: golang - exclude: ^docs/ args: [ -f, LICENSE, ] - types_or: [ python ] + types_or: [ python, cython ] diff --git a/docs/source/_ext/lexers.py b/docs/source/_ext/lexers.py index 9421ff3..41f492c 100644 --- a/docs/source/_ext/lexers.py +++ b/docs/source/_ext/lexers.py @@ -33,13 +33,14 @@ from collections.abc import Iterator from typing import ClassVar -from pygments.lexer import bygroups, include -from pygments.lexers.python import PythonLexer +from pygments.lexer import bygroups, combined, include, words +from pygments.lexers.python import CythonLexer, PythonLexer, RegexLexer from pygments.token import ( Comment, Keyword, Name, Number, + Operator, Punctuation, String, Text, @@ -63,8 +64,65 @@ def inner(a, b) -> bool: return inner -root: list = [ - (r"\n", Whitespace), +class MixinLexer(RegexLexer): + """Regex Mixin Lexer class. + + Notes: + 1. Supports primitive rainbow bracket coloring. + 2. Supports primitive constant declaration (uppercase variables) + + """ + + n_brackets: int + _stack: deque[int] + + def __init__(self, **options) -> None: + self.n_brackets = int(options.pop("n_brackets", 4)) + super().__init__(**options) + self._stack = deque[int]() + + def _enter(self) -> _TokenType: + """Retrieve next token in cycle.""" + idx = len(self._stack) % self.n_brackets + self._stack.append(idx) + + return get_bracket_level(idx) + + def _exit(self) -> _TokenType: + """Remove element from stack and return token.""" + try: + idx: int = self._stack.pop() + return get_bracket_level(idx) + + # NOTE: Only additional ending brackets trigger this (e.g. `{{ }}}` ). + # NOTE: We are not attempting to detect correct matching brackets (e.g. `(]` ) + except IndexError: + return Punctuation.Error + + def get_tokens_unprocessed( + self, + text, + stack=("root",), + ) -> Iterator[tuple[int, _TokenType, str]]: + _token: _TokenType + for idx, token, value in super().get_tokens_unprocessed(text, stack): + _token = token + if token is Name and value.isupper(): + _token = Name.Constant + + elif token is Punctuation: + match value: + case "(" | "[" | "{" | "<": + _token = self._enter() + case "}" | "]" | ")" | ">": + _token = self._exit() + case _: + ... + + yield idx, _token, value + + +docstrings: list = [ ( # single line docstrings (edge case) r'^(\s*)([rRuUbB]{,2})("""(?:.)*?""")', bygroups(Whitespace, String.Affix, String.Doc), @@ -72,20 +130,29 @@ def inner(a, b) -> bool: ( # Modfied triple double quote docstrings to highlight docstring titles r'^(\s*)([rRuUbB]{,2})(""")', bygroups(Whitespace, String.Affix, String.Doc), - "docstring-double", + "docstring-double-quotes", ), ( # Intentionally treat text encapsulated within single triple quotes as String r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Whitespace, String.Affix, String), ), +] + +comments: list = [ (r"\A#!.+$", Comment.Hashbang), + # Format Special Common Keywords in Comments ( - # Format Special Common Keyword Comments - # NOTE: Must come before Comment.Single token in order to be matched. - r"(#\s*)(TODO|FIXME|NOTE|BUG|HACK|XXX)(:?)(.*$)", - bygroups(Comment.Single, Comment.Special, Comment.Special, Comment.Single), + r"(#\s*)(TODO|FIXME|NOTE|BUG|HACK|XXX)(.*$)", + bygroups(Comment.Single, Comment.Special, Comment.Single), ), (r"#.*$", Comment.Single), +] + + +python_root: list = [ + (r"\n", Whitespace), + *docstrings, + *comments, (r"\\\n", Text), (r"\\", Text), include("keywords"), @@ -115,8 +182,8 @@ def inner(a, b) -> bool: python_tokens: dict[str, list] = PythonLexer.tokens.copy() -python_tokens["root"] = root -python_tokens["docstring-double"] = [ +python_tokens["root"] = python_root +python_tokens["docstring-double-quotes"] = [ ( r"(?<=\n)(\s*)(Args|Attributes|Returns|Raises|" r"Examples|Yields|References|Notes|Equations)(:)(\s*)", @@ -135,6 +202,7 @@ def inner(a, b) -> bool: (r"\b([a-zA-Z_]\w*)(?=\s*\()", Name.Function), ) +# Tokenize segment of number literals declared in different base (non base 10) python_tokens["numbers"] = [ ( r"(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)" @@ -149,61 +217,263 @@ def inner(a, b) -> bool: ] -class CustomPythonLexer(PythonLexer): - """Enhanced regex-based python Lexer. +class CustomPythonLexer(MixinLexer, PythonLexer): + """Custom enhanced regex-based python lexer. Notes: 1. Implemented a simple stack based rainbow bracket colorizer. - * limitation: Only detects errors that close more brackets than opens. + * limitation: Only detects errors that close more brackets than it opens. + * limitation: No attempt is made to confirm matching closing brackets. 2. Highlight Docstring titles (assumes google docstring format) 3. Improved highlighting function calls (with limitations) 4. Modify display of number components which indicate a different base number. """ - n_brackets: int - _stack: deque[int] tokens: ClassVar[dict[str, list]] = python_tokens - def __init__(self, **options) -> None: - super().__init__(**options) - self._stack = deque[int]() - self.n_brackets = int(options.get("n_brackets", 4)) - - def _enter(self) -> _TokenType: - """Retrieve next token in cycle.""" - idx = len(self._stack) % self.n_brackets - self._stack.append(idx) - - return get_bracket_level(idx) - def _exit(self) -> _TokenType: - """Remove element from stack and return token.""" - try: - idx: int = self._stack.pop() - return get_bracket_level(idx) +cython_root = [ + (r"\n", Whitespace), + *docstrings, + (r"[^\S\n]+", Text), + *comments, + (r"[]{}:(),;[]", Punctuation), + (r"\\\n", Whitespace), + (r"\\", Text), + (r"(in|is|and|or|not)\b", Operator.Word), + (r"(<)([a-zA-Z0-9.?]+)(>)", bygroups(Punctuation, Keyword.Type, Punctuation)), + (r"!=|==|<<|>>|[-~+/*%=<>&^|.?]", Operator), + ( + r"(from)(\d+)(<=)(\s+)(<)(\d+)(:)", + bygroups( + Keyword, Number.Integer, Operator, Whitespace, Operator, Name, Punctuation + ), + ), + include("keywords"), + (r"(def)(\s+)", bygroups(Keyword.Declare, Whitespace), "funcname"), + (r"(property)(\s+)", bygroups(Keyword.Type, Whitespace), "funcname"), + (r"(cp?def)(\s+)", bygroups(Keyword.Declare, Whitespace), "cdef"), + (r"(ctypedef)(\s+)", bygroups(Keyword.Declare, Whitespace), "ctypedef"), + (r"(cdef)(:)", bygroups(Keyword.Declare, Punctuation)), + ( + r"(class|cppclass|struct)(\s+)", + bygroups(Keyword.Declare, Whitespace), + "classname", + ), + (r"(from)(\s+)", bygroups(Keyword.Namespace, Whitespace), "fromimport"), + (r"(c?import)(\s+)", bygroups(Keyword.Namespace, Whitespace), "import"), + include("builtins"), + include("backtick"), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, "tdqs"), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, "tsqs"), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String, "dqs"), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String, "sqs"), + ('[uU]?"""', String, combined("stringescape", "tdqs")), + ("[uU]?'''", String, combined("stringescape", "tsqs")), + ('[uU]?"', String, combined("stringescape", "dqs")), + ("[uU]?'", String, combined("stringescape", "sqs")), + include("name"), + include("numbers"), +] - except IndexError: - return Punctuation.Error +cython_tokens: dict[str, list] = CythonLexer.tokens.copy() +cython_tokens["root"] = cython_root +cython_tokens["numbers"] = python_tokens["numbers"] +cython_tokens["docstring-double-quotes"] = python_tokens["docstring-double-quotes"] +cython_tokens["name"].insert( + _find(cython_tokens["name"], Name, _get_index(1)), + (r"\b([a-zA-Z_]\w*)(?=\s*\()", Name.Function), +) +cython_tokens["cdef"] = [ + # include packed keyword + (r"(public|readonly|extern|api|inline|packed|fused)\b", Keyword), + # Specialize Name.Class vs Name.Function vs Name.Variable tokens + ( + # include cppclass keyword + r"(struct|enum|union|class|cppclass)\b(\s+)([a-zA-Z_]\w*)", + bygroups(Keyword.Declare, Whitespace, Name.Class), + "#pop", + ), + (r"([a-zA-Z_]\w*)(\s*)(?=\()", bygroups(Name.Function, Whitespace), "#pop"), + (r"([a-zA-Z_]\w*)(\s*)(?=[:,=#\n]|$)", bygroups(Name.Variable, Whitespace), "#pop"), + (r"([a-zA-Z_]\w*)(\s*)(,)", bygroups(Name.Variable, Whitespace, Punctuation)), + (r"from\b", Keyword, "#pop"), + (r"as\b", Keyword), + (r":", Punctuation, "#pop"), + (r'(?=["\'])', Text, "#pop"), + (r"[a-zA-Z_]\w*", Keyword.Type), + (r".", Text), +] +# Define new ctypedef context +cython_tokens["ctypedef"] = [ + (r"(public|readonly|extern|api|inline|packed|fused)\b", Keyword), + ( + r"(\s*)([a-zA-Z_]\w*)(\s*)(:)", + bygroups(Whitespace, Name.Class, Whitespace, Punctuation), + "#pop", + ), + ( + r"(struct|enum|union|class|cppclass)(\s+)([a-zA-Z_]\w*)", + bygroups(Keyword.Declare, Whitespace, Name.Class), + "#pop", + ), + ( + r"([a-zA-Z_]\w*)(\s+)([a-zA-Z_]\w*)", + bygroups(Keyword.Type, Whitespace, Name.Class), + "#pop", + ), + (r"([a-zA-Z_]\w*)", Name.Class, "#pop"), +] +# Define Keyword.Constant token +cython_tokens["keywords"].append( + (words(("True", "False", "None", "NULL"), suffix=r"\b"), Keyword.Constant) +) +cython_tokens["keywords"][_find(cython_tokens["keywords"], Keyword, _get_index(1))] = ( + words( + ( + "assert", + "async", + "await", + "break", + "by", + "continue", + # "ctypedef", + "del", + "elif", + "else", + "except", + "except?", + "exec", + "finally", + "for", + # "fused", + "gil", + "global", + "if", + "include", + "lambda", + "namespace", # added + "new", # added - relevant for c++ syntax + "noexcept", # added + "nogil", + "pass", + "print", + "raise", + "return", + "try", + "while", + "yield", + "as", + "with", + ), + suffix=r"\b", + ), + Keyword, +) +# Redefine Name.Builtin.Pseudo token (to not include Keyword.Constant values) +cython_tokens["builtins"][ + _find(cython_tokens["builtins"], Name.Builtin.Pseudo, _get_index(1)) +] = (r"(? Iterator[tuple[int, _TokenType, str]]: - _token: _TokenType - for idx, token, value in super().get_tokens_unprocessed(text, stack): - _token = token - if token is Name and value.isupper(): - _token = Name.Constant - elif token is Punctuation: - match value: - case "(" | "[" | "{": - _token = self._enter() - case "}" | "]" | ")": - _token = self._exit() - case _: - ... +class CustomCythonLexer(MixinLexer, CythonLexer): + """Custom enhanced regex-based cython lexer.""" - yield idx, _token, value + tokens: ClassVar[dict[str, list]] = cython_tokens diff --git a/docs/source/_ext/styles.py b/docs/source/_ext/styles.py index 1b66028..1b9c7f0 100644 --- a/docs/source/_ext/styles.py +++ b/docs/source/_ext/styles.py @@ -97,12 +97,12 @@ class VSCodeDarkPlus(Style): Keyword.Type: Colors.datatype, Keyword.Declare: bold(Colors.declare), Keyword.Constant: bold(Colors.declare), - Keyword.Reserved: bold(Colors.reserved), + Keyword.Reserved: Colors.declare, Keyword.Namespace: Colors.control, # Variable Names Name: Colors.variable, Name.Type: Colors.builtin, - Name.Class: bold(Colors.datatype), + Name.Class: Colors.datatype, Name.Builtin: Colors.builtin, Name.Builtin.Pseudo: italic(Colors.variable), Name.Constant: "#4FC1FF", diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 1af17b1..135137e 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -3,10 +3,21 @@ PyTemplate API Documentation PyTemplate API documentation. +Here are python and cython code snippets to demonstrate the use of respective customized +lexers with custom syntax highlighting style. These examples are not necessarily meant +to be fully valid code, but to demonstrate key features not available through standard +pygments syntax highlighting styles. + +Python Example Snippet +---------------------- + .. code-block:: python :caption: example.py - from typing import ClassVar as ClassV + #!/usr/bin/env python3 + """Module level docstring.""" + from typing import ClassVar + import numpy as np CONSTANT_A: int = 0xFF CONSTANT_B: float = np.pi @@ -14,11 +25,11 @@ PyTemplate API documentation. # NOTE: this is an example class class Example(object): """Example docstring. - + Args: arg1 (str): argument 1 arg2 (int): argument 2 - + Attributes: data (dict): data @@ -26,7 +37,8 @@ PyTemplate API documentation. arg1: str arg2: int data: dict - other: ClassV[list[int]] = [1, 5, 7] + seventeen: ClassVar[list[int]] = [17, 0x11, 0o21, 0b10001] + other: ClassVar[list[int]] = [1e-5, 1.0e+3, 2j, 2l, 2.7E4J] def __init__(self, arg1: str, arg2: int) -> None: self.arg1 = arg1 @@ -37,10 +49,10 @@ PyTemplate API documentation. } def __getattr__(self, value): - return self.data[value] + return self.method(value) def method(self, value): - return self[value] + return self.data[value] def write(self, text): print(f"{text:<5}\n") @@ -52,6 +64,88 @@ PyTemplate API documentation. return value + 0b10011 +Cython Example Snippet +---------------------- + +.. code-block:: cython + :caption: example.pyx + + """Module level docstring.""" + import cython + from libc.stdlib cimport free, malloc + + cdef extern from "" namespace "std": + cdef cppclass vector[T]: + vector() + T& operator[](int) + + ctypedef fused StringTypeObject: + str + bytes + + ctypedef struct CustomStruct: + int y + str z + + cdef packed struct Breakfast: + int[4] spam + signed char[5] eggs + + cdef enum CheeseType: + manchego = 1 + gouda = 2 + camembert = 3 + + cdef union MyUnion: + int i + float f + char c + + cdef inline unsigned char* function(bint flag) noexcept: + cdef: + Py_ssize_t j + unsigned char* k = NULL + + k = malloc(5 * sizeof(unsigned char)) + + for j in range(5): + k[j] = "A" + + return k + + # XXX: this is an example class + cdef class Example: + """The little example class that couldn't. + + Args: + arg1 (unsigned long long): ... + arg2 (double): ... + + """ + cdef public unsigned long long v + cdef readonly double k + cdef char* mem + + def __cinit__(self, unsigned long long arg1, double arg2): + self.v = arg1 + self.k = arg2 + self.mem = malloc(5 * sizeof(char)) + + def __dealloc__(self): + free(self.mem) + + @cython.boundscheck(False) + cdef char index(self, size_t idx): + return self.mem[idx] + + # just an example of nested parenthesis to demonstrate rainbow coloring + cdef dict obj = { + "a": [(1, 2, (3, 4, 5)), (6, 7, (8, 9 , 10))], + "b": {"c": (7, 4, 3), "d": {"e", "f", "g"}}, + } + cdef tuple builtin_constants = (True, False, NULL, None,) + + .. toctree:: :caption: Submodules diff --git a/docs/source/conf.py b/docs/source/conf.py index e61cec9..2a071e0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -72,11 +72,17 @@ html_theme = "sphinx_rtd_theme" html_static_path = ["_static"] html_css_files = ["custom.css"] +# html_logo = "_static/logo.png" +github_url = "https://github.com/Spill-Tea/PyTemplate" + +# Theme options (specific to sphinx_rtd_theme) +# https://github.com/readthedocs/sphinx_rtd_theme/blob/master/docs/configuring.rst#id7 +# html_theme_options = {} def setup(app: Sphinx) -> None: """Custom sphinx application startup setup.""" - from lexers import CustomPythonLexer # type: ignore + from lexers import CustomCythonLexer, CustomPythonLexer # type: ignore # NOTE: overwrite default python lexer app.add_lexer("python", CustomPythonLexer) @@ -84,3 +90,9 @@ def setup(app: Sphinx) -> None: assert _lexer_registry["python"] == CustomPythonLexer, ( "custom Lexer not found in registry." ) + + app.add_lexer("cython", CustomCythonLexer) + assert "cython" in _lexer_registry, "python language not found in registry" + assert _lexer_registry["cython"] == CustomCythonLexer, ( + "custom Lexer not found in registry." + )