Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F100031654
tokenizer.js
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Jan 27, 19:37
Size
10 KB
Mime Type
text/x-Algol68
Expires
Wed, Jan 29, 19:37 (2 d)
Engine
blob
Format
Raw Data
Handle
23878590
Attached To
rOACCT Open Access Compliance Check Tool (OACCT)
tokenizer.js
View Options
import { FSM, makeTransition } from "@webassemblyjs/helper-fsm";
import { codeFrameFromSource } from "@webassemblyjs/helper-code-frame";
// eslint-disable-next-line
function getCodeFrame(source, line, column) {
var loc = {
start: {
line: line,
column: column
}
};
return "\n" + codeFrameFromSource(source, loc) + "\n";
}
var WHITESPACE = /\s/;
var PARENS = /\(|\)/;
var LETTERS = /[a-z0-9_/]/i;
var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
var valtypes = ["i32", "i64", "f32", "f64"];
var NUMBERS = /[0-9|.|_]/;
var NUMBER_KEYWORDS = /nan|inf/;
function isNewLine(char) {
return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
}
function Token(type, value, start, end) {
var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
var token = {
type: type,
value: value,
loc: {
start: start,
end: end
}
};
if (Object.keys(opts).length > 0) {
// $FlowIgnore
token["opts"] = opts;
}
return token;
}
var tokenTypes = {
openParen: "openParen",
closeParen: "closeParen",
number: "number",
string: "string",
name: "name",
identifier: "identifier",
valtype: "valtype",
dot: "dot",
comment: "comment",
equal: "equal",
keyword: "keyword"
};
export var keywords = {
module: "module",
func: "func",
param: "param",
result: "result",
export: "export",
loop: "loop",
block: "block",
if: "if",
then: "then",
else: "else",
call: "call",
call_indirect: "call_indirect",
import: "import",
memory: "memory",
table: "table",
global: "global",
anyfunc: "anyfunc",
mut: "mut",
data: "data",
type: "type",
elem: "elem",
start: "start",
offset: "offset"
};
var NUMERIC_SEPARATOR = "_";
/**
* Build the FSM for number literals
*/
var numberLiteralFSM = new FSM({
START: [makeTransition(/-|\+/, "AFTER_SIGN"), makeTransition(/nan:0x/, "NAN_HEX", {
n: 6
}), makeTransition(/nan|inf/, "STOP", {
n: 3
}), makeTransition(/0x/, "HEX", {
n: 2
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
AFTER_SIGN: [makeTransition(/nan:0x/, "NAN_HEX", {
n: 6
}), makeTransition(/nan|inf/, "STOP", {
n: 3
}), makeTransition(/0x/, "HEX", {
n: 2
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
DEC_FRAC: [makeTransition(/[0-9]/, "DEC_FRAC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
DEC: [makeTransition(/[0-9]/, "DEC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/\./, "DEC_FRAC"), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
DEC_SIGNED_EXP: [makeTransition(/\+|-/, "DEC_EXP"), makeTransition(/[0-9]/, "DEC_EXP")],
DEC_EXP: [makeTransition(/[0-9]/, "DEC_EXP", {
allowedSeparator: NUMERIC_SEPARATOR
})],
HEX: [makeTransition(/[0-9|A-F|a-f]/, "HEX", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/\./, "HEX_FRAC"), makeTransition(/p|P/, "HEX_SIGNED_EXP")],
HEX_FRAC: [makeTransition(/[0-9|A-F|a-f]/, "HEX_FRAC", {
allowedSeparator: NUMERIC_SEPARATOR
}), makeTransition(/p|P|/, "HEX_SIGNED_EXP")],
HEX_SIGNED_EXP: [makeTransition(/[0-9|+|-]/, "HEX_EXP")],
HEX_EXP: [makeTransition(/[0-9]/, "HEX_EXP", {
allowedSeparator: NUMERIC_SEPARATOR
})],
NAN_HEX: [makeTransition(/[0-9|A-F|a-f]/, "NAN_HEX", {
allowedSeparator: NUMERIC_SEPARATOR
})],
STOP: []
}, "START", "STOP");
export function tokenize(input) {
var current = 0;
var char = input[current]; // Used by SourceLocation
var column = 1;
var line = 1;
var tokens = [];
/**
* Creates a pushToken function for a given type
*/
function pushToken(type) {
return function (v) {
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
var startColumn = opts.startColumn || column - String(v).length;
delete opts.startColumn;
var endColumn = opts.endColumn || startColumn + String(v).length - 1;
delete opts.endColumn;
var start = {
line: line,
column: startColumn
};
var end = {
line: line,
column: endColumn
};
tokens.push(Token(type, v, start, end, opts));
};
}
/**
* Functions to save newly encountered tokens
*/
var pushCloseParenToken = pushToken(tokenTypes.closeParen);
var pushOpenParenToken = pushToken(tokenTypes.openParen);
var pushNumberToken = pushToken(tokenTypes.number);
var pushValtypeToken = pushToken(tokenTypes.valtype);
var pushNameToken = pushToken(tokenTypes.name);
var pushIdentifierToken = pushToken(tokenTypes.identifier);
var pushKeywordToken = pushToken(tokenTypes.keyword);
var pushDotToken = pushToken(tokenTypes.dot);
var pushStringToken = pushToken(tokenTypes.string);
var pushCommentToken = pushToken(tokenTypes.comment);
var pushEqualToken = pushToken(tokenTypes.equal);
/**
* Can be used to look at the next character(s).
*
* The default behavior `lookahead()` simply returns the next character without consuming it.
* Letters are always returned in lowercase.
*
* @param {number} length How many characters to query. Default = 1
* @param {number} offset How many characters to skip forward from current one. Default = 1
*
*/
function lookahead() {
var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
return input.substring(current + offset, current + offset + length).toLowerCase();
}
/**
* Advances the cursor in the input by a certain amount
*
* @param {number} amount How many characters to consume. Default = 1
*/
function eatCharacter() {
var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
column += amount;
current += amount;
char = input[current];
}
while (current < input.length) {
// ;;
if (char === ";" && lookahead() === ";") {
var startColumn = column;
eatCharacter(2);
var text = "";
while (!isNewLine(char)) {
text += char;
eatCharacter();
if (char === undefined) {
break;
}
}
var endColumn = column;
pushCommentToken(text, {
type: "leading",
startColumn: startColumn,
endColumn: endColumn
});
continue;
} // (;
if (char === "(" && lookahead() === ";") {
var _startColumn = column;
eatCharacter(2);
var _text = ""; // ;)
while (true) {
char = input[current];
if (char === ";" && lookahead() === ")") {
eatCharacter(2);
break;
}
_text += char;
eatCharacter();
if (isNewLine(char)) {
line++;
column = 0;
}
}
var _endColumn = column;
pushCommentToken(_text, {
type: "block",
startColumn: _startColumn,
endColumn: _endColumn
});
continue;
}
if (char === "(") {
pushOpenParenToken(char);
eatCharacter();
continue;
}
if (char === "=") {
pushEqualToken(char);
eatCharacter();
continue;
}
if (char === ")") {
pushCloseParenToken(char);
eatCharacter();
continue;
}
if (isNewLine(char)) {
line++;
eatCharacter();
column = 0;
continue;
}
if (WHITESPACE.test(char)) {
eatCharacter();
continue;
}
if (char === "$") {
var _startColumn2 = column;
eatCharacter();
var value = "";
while (idchar.test(char)) {
value += char;
eatCharacter();
}
var _endColumn2 = column;
pushIdentifierToken(value, {
startColumn: _startColumn2,
endColumn: _endColumn2
});
continue;
}
if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
var _startColumn3 = column;
var _value = numberLiteralFSM.run(input.slice(current));
if (_value === "") {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
pushNumberToken(_value, {
startColumn: _startColumn3
});
eatCharacter(_value.length);
if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
continue;
}
if (char === '"') {
var _startColumn4 = column;
var _value2 = "";
eatCharacter(); // "
while (char !== '"') {
if (isNewLine(char)) {
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
_value2 += char;
eatCharacter(); // char
}
eatCharacter(); // "
var _endColumn3 = column;
pushStringToken(_value2, {
startColumn: _startColumn4,
endColumn: _endColumn3
});
continue;
}
if (LETTERS.test(char)) {
var _value3 = "";
var _startColumn5 = column;
while (char && LETTERS.test(char)) {
_value3 += char;
eatCharacter();
}
/*
* Handle MemberAccess
*/
if (char === ".") {
var dotStartColumn = column;
if (valtypes.indexOf(_value3) !== -1) {
pushValtypeToken(_value3, {
startColumn: _startColumn5
});
} else {
pushNameToken(_value3);
}
eatCharacter();
_value3 = "";
var nameStartColumn = column;
while (LETTERS.test(char)) {
_value3 += char;
eatCharacter();
}
pushDotToken(".", {
startColumn: dotStartColumn
});
pushNameToken(_value3, {
startColumn: nameStartColumn
});
continue;
}
/*
* Handle keywords
*/
// $FlowIgnore
if (typeof keywords[_value3] === "string") {
pushKeywordToken(_value3, {
startColumn: _startColumn5
});
continue;
}
/*
* Handle types
*/
if (valtypes.indexOf(_value3) !== -1) {
pushValtypeToken(_value3, {
startColumn: _startColumn5
});
continue;
}
/*
* Handle literals
*/
pushNameToken(_value3, {
startColumn: _startColumn5
});
continue;
}
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
}
return tokens;
}
export var tokens = tokenTypes;
Event Timeline
Log In to Comment