|
| 1 | +const fns = [ |
| 2 | + "sin", |
| 3 | + "cos", |
| 4 | + "tan", |
| 5 | + "asin", |
| 6 | + "acos", |
| 7 | + "atan", |
| 8 | + "sinh", |
| 9 | + "cosh", |
| 10 | + "tanh", |
| 11 | + "abs", |
| 12 | +]; |
| 13 | + |
| 14 | +const ops = ["+", "-", "*", "/", "^", ","]; |
| 15 | + |
| 16 | +const lBracket = "("; |
| 17 | +const rBracket = ")"; |
| 18 | + |
| 19 | +const extraIdentifiers = ["theta"]; |
| 20 | + |
| 21 | +const isOperator = (token: string): boolean => ops.includes(token); |
| 22 | +const hasFunction = (str: string): string | undefined => { |
| 23 | + for (const fn of fns) if (str.startsWith(fn)) return fn; |
| 24 | +}; |
| 25 | +const hasIdentifier = (str: string): string | undefined => { |
| 26 | + for (const id of extraIdentifiers) if (str.startsWith(id)) return id; |
| 27 | + const ch = str[0]; |
| 28 | + if (ch >= "a" && ch <= "z") return ch; |
| 29 | + if (ch >= "A" && ch <= "Z") return ch; |
| 30 | +}; |
| 31 | + |
| 32 | +type Token = { |
| 33 | + index: number; |
| 34 | + raw: string; |
| 35 | + type: |
| 36 | + | "function" |
| 37 | + | "operator" |
| 38 | + | "bracket" |
| 39 | + | "number" |
| 40 | + | "identifier" |
| 41 | + | "unknown"; |
| 42 | + level?: number; |
| 43 | +}; |
| 44 | + |
| 45 | +type EaterResult = { |
| 46 | + token?: Token; |
| 47 | + increment: number; |
| 48 | +}; |
| 49 | + |
| 50 | +type SomePartial<T, K extends keyof T> = Omit<T, K> & Partial<Pick<T, K>>; |
| 51 | + |
| 52 | +type Eater = ( |
| 53 | + eating: string | number, |
| 54 | + token?: SomePartial<Token, "index"> |
| 55 | +) => EaterResult; |
| 56 | + |
| 57 | +type Tokenizer = ( |
| 58 | + eat: Eater, |
| 59 | + rest: string, |
| 60 | + tokens: Token[], |
| 61 | + index: number |
| 62 | +) => EaterResult; |
| 63 | + |
| 64 | +function tokenizeHelper(str: string, tokenizer: Tokenizer) { |
| 65 | + const length = str.length; |
| 66 | + let increment = 1; |
| 67 | + let tokens: Token[] = []; |
| 68 | + for (let index = 0; index < length; index += increment) { |
| 69 | + const rest = str.slice(index); |
| 70 | + const eater: Eater = (eating, token) => { |
| 71 | + if (typeof eating === "string") { |
| 72 | + if (!rest.startsWith(eating)) throw new Error("Invalid eating"); |
| 73 | + eating = eating.length; |
| 74 | + } |
| 75 | + return { |
| 76 | + token: token ? { ...token, index } : undefined, |
| 77 | + increment: eating, |
| 78 | + }; |
| 79 | + }; |
| 80 | + const { token, increment: currentIncrement } = tokenizer( |
| 81 | + eater, |
| 82 | + rest, |
| 83 | + tokens, |
| 84 | + index |
| 85 | + ); |
| 86 | + increment = currentIncrement; |
| 87 | + if (token) tokens.push(token); |
| 88 | + } |
| 89 | + return tokens; |
| 90 | +} |
| 91 | + |
| 92 | +const bracketStack: { |
| 93 | + raw: "(" | ")"; |
| 94 | + tokenIndex: number; |
| 95 | +}[] = []; |
| 96 | + |
| 97 | +const tokenizer: Tokenizer = (eat, rest, tokens, _index) => { |
| 98 | + const ch = rest[0]; |
| 99 | + if (ch === " ") return eat(1); |
| 100 | + const fn = hasFunction(rest); |
| 101 | + if (fn) |
| 102 | + return eat(fn, { |
| 103 | + raw: fn, |
| 104 | + type: "function", |
| 105 | + }); |
| 106 | + if (isOperator(ch)) |
| 107 | + return eat(1, { |
| 108 | + raw: ch, |
| 109 | + type: "operator", |
| 110 | + }); |
| 111 | + if (ch === lBracket) { |
| 112 | + bracketStack.push({ |
| 113 | + raw: lBracket, |
| 114 | + tokenIndex: tokens.length, |
| 115 | + }); |
| 116 | + return eat(1, { |
| 117 | + raw: lBracket, |
| 118 | + type: "bracket", |
| 119 | + }); |
| 120 | + } |
| 121 | + if (ch === rBracket) |
| 122 | + return eat(1, { |
| 123 | + raw: rBracket, |
| 124 | + type: "bracket", |
| 125 | + }); |
| 126 | + const id = hasIdentifier(rest); |
| 127 | + if (id) |
| 128 | + return eat(id, { |
| 129 | + raw: id, |
| 130 | + type: "identifier", |
| 131 | + }); |
| 132 | + return eat(1, { |
| 133 | + type: "unknown", |
| 134 | + raw: ch, |
| 135 | + }); |
| 136 | +}; |
| 137 | + |
| 138 | +console.log(tokenizeHelper("sin(x) + cos(y)", tokenizer)); |
0 commit comments