Page MenuHomec4science

tokenizer.js
No OneTemporary

File Metadata

Created
Mon, Jan 27, 19:37

tokenizer.js

import { FSM, makeTransition } from "@webassemblyjs/helper-fsm";
import { codeFrameFromSource } from "@webassemblyjs/helper-code-frame";
// eslint-disable-next-line
function getCodeFrame(source, line, column) {
var loc = {
start: {
line: line,
column: column
}
};
return "\n" + codeFrameFromSource(source, loc) + "\n";
}
var WHITESPACE = /\s/;
var PARENS = /\(|\)/;
var LETTERS = /[a-z0-9_/]/i;
var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
var valtypes = ["i32", "i64", "f32", "f64"];
var NUMBERS = /[0-9|.|_]/;
var NUMBER_KEYWORDS = /nan|inf/;
function isNewLine(char) {
return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
}
function Token(type, value, start, end) {
var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
var token = {
type: type,
value: value,
loc: {
start: start,
end: end
}
};
if (Object.keys(opts).length > 0) {
// $FlowIgnore
token["opts"] = opts;
}
return token;
}
var tokenTypes = {
openParen: "openParen",
closeParen: "closeParen",
number: "number",
string: "string",
name: "name",
identifier: "identifier",
valtype: "valtype",
dot: "dot",
comment: "comment",
equal: "equal",
keyword: "keyword"
};
export var keywords = {
module: "module",
func: "func",
param: "param",
result: "result",
export: "export",
loop: "loop",
block: "block",
if: "if",
then: "then",
else: "else",
call: "call",
call_indirect: "call_indirect",
import: "import",
memory: "memory",
table: "table",
global: "global",
anyfunc: "anyfunc",
mut: "mut",
data: "data",
type: "type",
elem: "elem",
start: "start",
offset: "offset"
};
var NUMERIC_SEPARATOR = "_";
/**
* Build the FSM for number literals
*/
var numberLiteralFSM = new FSM({
START: [makeTransition(/-|\+/, "AFTER_SIGN"), makeTransition(/nan:0x/, "NAN_HEX", {
n: 6
}), makeTransition(/nan|inf/, "STOP", {
n: 3
}), makeTransition(/0x/, "HEX", {
n: 2
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
AFTER_SIGN: [makeTransition(/nan:0x/, "NAN_HEX", {
n: 6
}), makeTransition(/nan|inf/, "STOP", {
n: 3
}), makeTransition(/0x/, "HEX", {
n: 2
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
DEC_FRAC: [makeTransition(/[0-9]/, "DEC_FRAC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
DEC: [makeTransition(/[0-9]/, "DEC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/\./, "DEC_FRAC"), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
DEC_SIGNED_EXP: [makeTransition(/\+|-/, "DEC_EXP"), makeTransition(/[0-9]/, "DEC_EXP")],
DEC_EXP: [makeTransition(/[0-9]/, "DEC_EXP", {
allowedSeparator: NUMERIC_SEPARATOR
})],
HEX: [makeTransition(/[0-9|A-F|a-f]/, "HEX", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/\./, "HEX_FRAC"), makeTransition(/p|P/, "HEX_SIGNED_EXP")],
HEX_FRAC: [makeTransition(/[0-9|A-F|a-f]/, "HEX_FRAC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/p|P|/, "HEX_SIGNED_EXP")],
HEX_SIGNED_EXP: [makeTransition(/[0-9|+|-]/, "HEX_EXP")],
HEX_EXP: [makeTransition(/[0-9]/, "HEX_EXP", {
allowedSeparator: NUMERIC_SEPARATOR
})],
NAN_HEX: [makeTransition(/[0-9|A-F|a-f]/, "NAN_HEX", {
allowedSeparator: NUMERIC_SEPARATOR
})],
STOP: []
}, "START", "STOP");
export function tokenize(input) {
var current = 0;
var char = input[current]; // Used by SourceLocation
var column = 1;
var line = 1;
var tokens = [];
/**
* Creates a pushToken function for a given type
*/
function pushToken(type) {
return function (v) {
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
var startColumn = opts.startColumn || column - String(v).length;
delete opts.startColumn;
var endColumn = opts.endColumn || startColumn + String(v).length - 1;
delete opts.endColumn;
var start = {
line: line,
column: startColumn
};
var end = {
line: line,
column: endColumn
};
tokens.push(Token(type, v, start, end, opts));
};
}
/**
* Functions to save newly encountered tokens
*/
var pushCloseParenToken = pushToken(tokenTypes.closeParen);
var pushOpenParenToken = pushToken(tokenTypes.openParen);
var pushNumberToken = pushToken(tokenTypes.number);
var pushValtypeToken = pushToken(tokenTypes.valtype);
var pushNameToken = pushToken(tokenTypes.name);
var pushIdentifierToken = pushToken(tokenTypes.identifier);
var pushKeywordToken = pushToken(tokenTypes.keyword);
var pushDotToken = pushToken(tokenTypes.dot);
var pushStringToken = pushToken(tokenTypes.string);
var pushCommentToken = pushToken(tokenTypes.comment);
var pushEqualToken = pushToken(tokenTypes.equal);
/**
* Can be used to look at the next character(s).
*
* The default behavior `lookahead()` simply returns the next character without consuming it.
* Letters are always returned in lowercase.
*
* @param {number} length How many characters to query. Default = 1
* @param {number} offset How many characters to skip forward from current one. Default = 1
*
*/
function lookahead() {
var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
return input.substring(current + offset, current + offset + length).toLowerCase();
}
/**
* Advances the cursor in the input by a certain amount
*
* @param {number} amount How many characters to consume. Default = 1
*/
function eatCharacter() {
var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
column += amount;
current += amount;
char = input[current];
}
while (current < input.length) {
// ;;
if (char === ";" && lookahead() === ";") {
var startColumn = column;
eatCharacter(2);
var text = "";
while (!isNewLine(char)) {
text += char;
eatCharacter();
if (char === undefined) {
break;
}
}
var endColumn = column;
pushCommentToken(text, {
type: "leading",
startColumn: startColumn,
endColumn: endColumn
});
continue;
} // (;
if (char === "(" && lookahead() === ";") {
var _startColumn = column;
eatCharacter(2);
var _text = ""; // ;)
while (true) {
char = input[current];
if (char === ";" && lookahead() === ")") {
eatCharacter(2);
break;
}
_text += char;
eatCharacter();
if (isNewLine(char)) {
line++;
column = 0;
}
}
var _endColumn = column;
pushCommentToken(_text, {
type: "block",
startColumn: _startColumn,
endColumn: _endColumn
});
continue;
}
if (char === "(") {
pushOpenParenToken(char);
eatCharacter();
continue;
}
if (char === "=") {
pushEqualToken(char);
eatCharacter();
continue;
}
if (char === ")") {
pushCloseParenToken(char);
eatCharacter();
continue;
}
if (isNewLine(char)) {
line++;
eatCharacter();
column = 0;
continue;
}
if (WHITESPACE.test(char)) {
eatCharacter();
continue;
}
if (char === "$") {
var _startColumn2 = column;
eatCharacter();
var value = "";
while (idchar.test(char)) {
value += char;
eatCharacter();
}
var _endColumn2 = column;
pushIdentifierToken(value, {
startColumn: _startColumn2,
endColumn: _endColumn2
});
continue;
}
if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
var _startColumn3 = column;
var _value = numberLiteralFSM.run(input.slice(current));
if (_value === "") {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
pushNumberToken(_value, {
startColumn: _startColumn3
});
eatCharacter(_value.length);
if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
continue;
}
if (char === '"') {
var _startColumn4 = column;
var _value2 = "";
eatCharacter(); // "
while (char !== '"') {
if (isNewLine(char)) {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
_value2 += char;
eatCharacter(); // char
}
eatCharacter(); // "
var _endColumn3 = column;
pushStringToken(_value2, {
startColumn: _startColumn4,
endColumn: _endColumn3
});
continue;
}
if (LETTERS.test(char)) {
var _value3 = "";
var _startColumn5 = column;
while (char && LETTERS.test(char)) {
_value3 += char;
eatCharacter();
}
/*
* Handle MemberAccess
*/
if (char === ".") {
var dotStartColumn = column;
if (valtypes.indexOf(_value3) !== -1) {
pushValtypeToken(_value3, {
startColumn: _startColumn5
});
} else {
pushNameToken(_value3);
}
eatCharacter();
_value3 = "";
var nameStartColumn = column;
while (LETTERS.test(char)) {
_value3 += char;
eatCharacter();
}
pushDotToken(".", {
startColumn: dotStartColumn
});
pushNameToken(_value3, {
startColumn: nameStartColumn
});
continue;
}
/*
* Handle keywords
*/
// $FlowIgnore
if (typeof keywords[_value3] === "string") {
pushKeywordToken(_value3, {
startColumn: _startColumn5
});
continue;
}
/*
* Handle types
*/
if (valtypes.indexOf(_value3) !== -1) {
pushValtypeToken(_value3, {
startColumn: _startColumn5
});
continue;
}
/*
* Handle literals
*/
pushNameToken(_value3, {
startColumn: _startColumn5
});
continue;
}
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
return tokens;
}
export var tokens = tokenTypes;

Event Timeline