diff --git a/public/scripts/gpt-2-3-tokenizer/README.md b/public/scripts/gpt-2-3-tokenizer/README.md index f09949a58..616eba077 100644 --- a/public/scripts/gpt-2-3-tokenizer/README.md +++ b/public/scripts/gpt-2-3-tokenizer/README.md @@ -5,20 +5,20 @@ GPT-2/3 byte pair encoder/decoder/tokenizer based on [@latitudegames/GPT-3-Encod See also: [JS byte pair encoder for OpenAI's CLIP model](https://github.com/josephrocca/clip-bpe-js). ```js -import {encode, decode} from "https://deno.land/x/gpt_2_3_tokenizer@v0.0.1/mod.js"; +import {encode, decode} from "https://deno.land/x/gpt_2_3_tokenizer@v0.0.2/mod.js"; let text = "hello world"; console.log(encode(text)); // [258, 18798, 995] console.log(decode(encode(text))); // "hello world" ``` or: ```js -let mod = await import("https://deno.land/x/gpt_2_3_tokenizer@v0.0.1/mod.js"); +let mod = await import("https://deno.land/x/gpt_2_3_tokenizer@v0.0.2/mod.js"); mod.encode("hello world"); // [258, 18798, 995] ``` or to include it as a global variable in the browser: ```html ``` diff --git a/public/scripts/gpt-2-3-tokenizer/mod.js b/public/scripts/gpt-2-3-tokenizer/mod.js index 622f5ab61..c325ce3a2 100644 --- a/public/scripts/gpt-2-3-tokenizer/mod.js +++ b/public/scripts/gpt-2-3-tokenizer/mod.js @@ -81,7 +81,7 @@ const bpe_ranks = dictZip(bpe_merges, range(0, bpe_merges.length)) const cache = {} function bpe(token) { - if (token in cache) { + if (Object.hasOwn(cache, token)) { return cache[token] } @@ -107,7 +107,7 @@ function bpe(token) { } ))] - if (!(bigram in bpe_ranks)) { + if (!(Object.hasOwn(bpe_ranks, bigram))) { break }