From 2cf427b6e125ab480528616b61177bbb0306b503 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 12 Sep 2025 10:19:34 -0600 Subject: [PATCH 1/2] Add multithreaded tokenizer test --- .../python/tests/bindings/test_tokenizer.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/bindings/python/tests/bindings/test_tokenizer.py b/bindings/python/tests/bindings/test_tokenizer.py index 28f6b38d4..e0b546288 100644 --- a/bindings/python/tests/bindings/test_tokenizer.py +++ b/bindings/python/tests/bindings/test_tokenizer.py @@ -557,6 +557,44 @@ def test_multiprocessing_with_parallelism(self): multiprocessing_with_parallelism(tokenizer, False) multiprocessing_with_parallelism(tokenizer, True) + def test_multithreaded_concurrency(self): + + # Thread worker functions + def encode_batch(batch): + tokenizer = Tokenizer(BPE()) + return tokenizer.encode_batch(batch) + + def encode_batch_fast(batch): + tokenizer = Tokenizer(BPE()) + return tokenizer.encode_batch_fast(batch) + + # Create some significant workload + batches = [ + ["my name is john " * 50] * 20, + ["my name is paul " * 50] * 20, + ["my name is ringo " * 50] * 20, + ] + + # Many encoding operations to run concurrently + tasks = [ + (encode_batch, batches[0]), + (encode_batch_fast, batches[1]), + (encode_batch, batches[2]), + ] * 10 + + executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + + futures = [] + for task in tasks: + futures.append(executor.submit(*task)) + + # All tasks should complete successfully + results = [f.result() for f in futures] + + # Verify results + assert len(results) == 30 + assert all(len(result) == 20 for result in results) + def test_from_pretrained(self): tokenizer = Tokenizer.from_pretrained("bert-base-cased") output = tokenizer.encode("Hey there dear friend!", add_special_tokens=False) From d7f7fb78f3b735a0f010f93f19415141be46077e Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 12 Sep 2025 10:24:37 -0600 Subject: [PATCH 2/2] Add 3.13t CI --- .github/workflows/python.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ef8bf4414..8a73e9cd6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -16,7 +16,7 @@ jobs: runs-on: windows-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -52,6 +52,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] + python: ["3.13", "3.13t"] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -72,7 +73,7 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: 3.13 + python-version: ${{ matrix.python }} architecture: "x64"