From cefedfedd7cb4ea28739a47e0cf173fc48deb17d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Nyffenegger?= Date: Wed, 9 Oct 2024 22:55:27 +0200 Subject: [PATCH] Remove toString() in lookup of this.byte_encoder this.byte_encoder maps Uint8's to strings, not strings to strings. The call to toString() is unnecessary and can be removed. --- tokenizer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokenizer.js b/tokenizer.js index e18da34..fc99eaf 100644 --- a/tokenizer.js +++ b/tokenizer.js @@ -96,7 +96,7 @@ class GPT2Tokenizer extends Tokenizer { const encoded_bytes = this.textEncoder.encode(token); let bytes = []; for (let i = 0; i < encoded_bytes.length; i++) { - bytes.push(this.byte_encoder[encoded_bytes[i].toString()]); + bytes.push(this.byte_encoder[encoded_bytes[i]]); } token = bytes.join("");