opencv · JorgeV92 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/testdata/dnn/llm/gpt2/README.md b/testdata/dnn/llm/gpt2/README.md
@@ -0,0 +1,24 @@
+# GPT-2 tokenizer 
+
+## Contents
+- `config.json`
+- `tokenizer.json`
+- `encoder.json`
+- `vocab.bpe`
+
+## Sources
+- `tokenizer.json`: 
+  https://huggingface.co/openai-community/gpt2/tree/main
+- `encoder.json`, `vocab.bpe`:  
+  https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json  
+  https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe
+
+## License & attribution
+- GPT-2 code/assets by OpenAI are released under a **Modified MIT License** (see upstream license).  
+  https://github.com/openai/gpt-2/blob/master/LICENSE
+- The Hugging Face `openai-community/gpt2` model page lists **MIT** as the license.
+
+These files are included here **verbatim** for interoperability/testing.  
+All copyrights remain with the original authors. If you redistribute, keep the original
+license notices and links to the sources above.
+
diff --git a/testdata/dnn/llm/gpt2/config.json b/testdata/dnn/llm/gpt2/config.json
@@ -0,0 +1,11 @@
+{
+  "model_type": "gpt2",
+  "method": "BPE",
+  "vocab_size": 50257,
+  "tokenizer_class": "GPT2TokenizerFast",
+  "eos_token": "",
+  "bos_token": null,
+  "pad_token": null
+}
+
+
diff --git a/testdata/dnn/llm/gpt2/encoder.json b/testdata/dnn/llm/gpt2/encoder.json