Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F98447021
tokenizer.js
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Jan 13, 06:59
Size
10 KB
Mime Type
text/x-Algol68
Expires
Wed, Jan 15, 06:59 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
23583198
Attached To
rOACCT Open Access Compliance Check Tool (OACCT)
tokenizer.js
View Options
import
{
FSM
,
makeTransition
}
from
"@webassemblyjs/helper-fsm"
;
import
{
codeFrameFromSource
}
from
"@webassemblyjs/helper-code-frame"
;
// eslint-disable-next-line
function
getCodeFrame
(
source
,
line
,
column
)
{
var
loc
=
{
start
:
{
line
:
line
,
column
:
column
}
};
return
"\n"
+
codeFrameFromSource
(
source
,
loc
)
+
"\n"
;
}
var
WHITESPACE
=
/\s/
;
var
PARENS
=
/\(|\)/
;
var
LETTERS
=
/[a-z0-9_/]/i
;
var
idchar
=
/[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i
;
var
valtypes
=
[
"i32"
,
"i64"
,
"f32"
,
"f64"
];
var
NUMBERS
=
/[0-9|.|_]/
;
var
NUMBER_KEYWORDS
=
/nan|inf/
;
function
isNewLine
(
char
)
{
return
char
.
charCodeAt
(
0
)
===
10
||
char
.
charCodeAt
(
0
)
===
13
;
}
function
Token
(
type
,
value
,
start
,
end
)
{
var
opts
=
arguments
.
length
>
4
&&
arguments
[
4
]
!==
undefined
?
arguments
[
4
]
:
{};
var
token
=
{
type
:
type
,
value
:
value
,
loc
:
{
start
:
start
,
end
:
end
}
};
if
(
Object
.
keys
(
opts
).
length
>
0
)
{
// $FlowIgnore
token
[
"opts"
]
=
opts
;
}
return
token
;
}
var
tokenTypes
=
{
openParen
:
"openParen"
,
closeParen
:
"closeParen"
,
number
:
"number"
,
string
:
"string"
,
name
:
"name"
,
identifier
:
"identifier"
,
valtype
:
"valtype"
,
dot
:
"dot"
,
comment
:
"comment"
,
equal
:
"equal"
,
keyword
:
"keyword"
};
export
var
keywords
=
{
module
:
"module"
,
func
:
"func"
,
param
:
"param"
,
result
:
"result"
,
export
:
"export"
,
loop
:
"loop"
,
block
:
"block"
,
if
:
"if"
,
then
:
"then"
,
else
:
"else"
,
call
:
"call"
,
call_indirect
:
"call_indirect"
,
import
:
"import"
,
memory
:
"memory"
,
table
:
"table"
,
global
:
"global"
,
anyfunc
:
"anyfunc"
,
mut
:
"mut"
,
data
:
"data"
,
type
:
"type"
,
elem
:
"elem"
,
start
:
"start"
,
offset
:
"offset"
};
var
NUMERIC_SEPARATOR
=
"_"
;
/**
* Build the FSM for number literals
*/
var
numberLiteralFSM
=
new
FSM
({
START
:
[
makeTransition
(
/-|\+/
,
"AFTER_SIGN"
),
makeTransition
(
/nan:0x/
,
"NAN_HEX"
,
{
n
:
6
}),
makeTransition
(
/nan|inf/
,
"STOP"
,
{
n
:
3
}),
makeTransition
(
/0x/
,
"HEX"
,
{
n
:
2
}),
makeTransition
(
/[0-9]/
,
"DEC"
),
makeTransition
(
/\./
,
"DEC_FRAC"
)],
AFTER_SIGN
:
[
makeTransition
(
/nan:0x/
,
"NAN_HEX"
,
{
n
:
6
}),
makeTransition
(
/nan|inf/
,
"STOP"
,
{
n
:
3
}),
makeTransition
(
/0x/
,
"HEX"
,
{
n
:
2
}),
makeTransition
(
/[0-9]/
,
"DEC"
),
makeTransition
(
/\./
,
"DEC_FRAC"
)],
DEC_FRAC
:
[
makeTransition
(
/[0-9]/
,
"DEC_FRAC"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
}),
makeTransition
(
/e|E/
,
"DEC_SIGNED_EXP"
)],
DEC
:
[
makeTransition
(
/[0-9]/
,
"DEC"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
}),
makeTransition
(
/\./
,
"DEC_FRAC"
),
makeTransition
(
/e|E/
,
"DEC_SIGNED_EXP"
)],
DEC_SIGNED_EXP
:
[
makeTransition
(
/\+|-/
,
"DEC_EXP"
),
makeTransition
(
/[0-9]/
,
"DEC_EXP"
)],
DEC_EXP
:
[
makeTransition
(
/[0-9]/
,
"DEC_EXP"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
})],
HEX
:
[
makeTransition
(
/[0-9|A-F|a-f]/
,
"HEX"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
}),
makeTransition
(
/\./
,
"HEX_FRAC"
),
makeTransition
(
/p|P/
,
"HEX_SIGNED_EXP"
)],
HEX_FRAC
:
[
makeTransition
(
/[0-9|A-F|a-f]/
,
"HEX_FRAC"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
}),
makeTransition
(
/p|P|/
,
"HEX_SIGNED_EXP"
)],
HEX_SIGNED_EXP
:
[
makeTransition
(
/[0-9|+|-]/
,
"HEX_EXP"
)],
HEX_EXP
:
[
makeTransition
(
/[0-9]/
,
"HEX_EXP"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
})],
NAN_HEX
:
[
makeTransition
(
/[0-9|A-F|a-f]/
,
"NAN_HEX"
,
{
allowedSeparator
:
NUMERIC_SEPARATOR
})],
STOP
:
[]
},
"START"
,
"STOP"
);
export
function
tokenize
(
input
)
{
var
current
=
0
;
var
char
=
input
[
current
];
// Used by SourceLocation
var
column
=
1
;
var
line
=
1
;
var
tokens
=
[];
/**
* Creates a pushToken function for a given type
*/
function
pushToken
(
type
)
{
return
function
(
v
)
{
var
opts
=
arguments
.
length
>
1
&&
arguments
[
1
]
!==
undefined
?
arguments
[
1
]
:
{};
var
startColumn
=
opts
.
startColumn
||
column
-
String
(
v
).
length
;
delete
opts
.
startColumn
;
var
endColumn
=
opts
.
endColumn
||
startColumn
+
String
(
v
).
length
-
1
;
delete
opts
.
endColumn
;
var
start
=
{
line
:
line
,
column
:
startColumn
};
var
end
=
{
line
:
line
,
column
:
endColumn
};
tokens
.
push
(
Token
(
type
,
v
,
start
,
end
,
opts
));
};
}
/**
* Functions to save newly encountered tokens
*/
var
pushCloseParenToken
=
pushToken
(
tokenTypes
.
closeParen
);
var
pushOpenParenToken
=
pushToken
(
tokenTypes
.
openParen
);
var
pushNumberToken
=
pushToken
(
tokenTypes
.
number
);
var
pushValtypeToken
=
pushToken
(
tokenTypes
.
valtype
);
var
pushNameToken
=
pushToken
(
tokenTypes
.
name
);
var
pushIdentifierToken
=
pushToken
(
tokenTypes
.
identifier
);
var
pushKeywordToken
=
pushToken
(
tokenTypes
.
keyword
);
var
pushDotToken
=
pushToken
(
tokenTypes
.
dot
);
var
pushStringToken
=
pushToken
(
tokenTypes
.
string
);
var
pushCommentToken
=
pushToken
(
tokenTypes
.
comment
);
var
pushEqualToken
=
pushToken
(
tokenTypes
.
equal
);
/**
* Can be used to look at the next character(s).
*
* The default behavior `lookahead()` simply returns the next character without consuming it.
* Letters are always returned in lowercase.
*
* @param {number} length How many characters to query. Default = 1
* @param {number} offset How many characters to skip forward from current one. Default = 1
*
*/
function
lookahead
()
{
var
length
=
arguments
.
length
>
0
&&
arguments
[
0
]
!==
undefined
?
arguments
[
0
]
:
1
;
var
offset
=
arguments
.
length
>
1
&&
arguments
[
1
]
!==
undefined
?
arguments
[
1
]
:
1
;
return
input
.
substring
(
current
+
offset
,
current
+
offset
+
length
).
toLowerCase
();
}
/**
* Advances the cursor in the input by a certain amount
*
* @param {number} amount How many characters to consume. Default = 1
*/
function
eatCharacter
()
{
var
amount
=
arguments
.
length
>
0
&&
arguments
[
0
]
!==
undefined
?
arguments
[
0
]
:
1
;
column
+=
amount
;
current
+=
amount
;
char
=
input
[
current
];
}
while
(
current
<
input
.
length
)
{
// ;;
if
(
char
===
";"
&&
lookahead
()
===
";"
)
{
var
startColumn
=
column
;
eatCharacter
(
2
);
var
text
=
""
;
while
(
!
isNewLine
(
char
))
{
text
+=
char
;
eatCharacter
();
if
(
char
===
undefined
)
{
break
;
}
}
var
endColumn
=
column
;
pushCommentToken
(
text
,
{
type
:
"leading"
,
startColumn
:
startColumn
,
endColumn
:
endColumn
});
continue
;
}
// (;
if
(
char
===
"("
&&
lookahead
()
===
";"
)
{
var
_startColumn
=
column
;
eatCharacter
(
2
);
var
_text
=
""
;
// ;)
while
(
true
)
{
char
=
input
[
current
];
if
(
char
===
";"
&&
lookahead
()
===
")"
)
{
eatCharacter
(
2
);
break
;
}
_text
+=
char
;
eatCharacter
();
if
(
isNewLine
(
char
))
{
line
++
;
column
=
0
;
}
}
var
_endColumn
=
column
;
pushCommentToken
(
_text
,
{
type
:
"block"
,
startColumn
:
_startColumn
,
endColumn
:
_endColumn
});
continue
;
}
if
(
char
===
"("
)
{
pushOpenParenToken
(
char
);
eatCharacter
();
continue
;
}
if
(
char
===
"="
)
{
pushEqualToken
(
char
);
eatCharacter
();
continue
;
}
if
(
char
===
")"
)
{
pushCloseParenToken
(
char
);
eatCharacter
();
continue
;
}
if
(
isNewLine
(
char
))
{
line
++
;
eatCharacter
();
column
=
0
;
continue
;
}
if
(
WHITESPACE
.
test
(
char
))
{
eatCharacter
();
continue
;
}
if
(
char
===
"$"
)
{
var
_startColumn2
=
column
;
eatCharacter
();
var
value
=
""
;
while
(
idchar
.
test
(
char
))
{
value
+=
char
;
eatCharacter
();
}
var
_endColumn2
=
column
;
pushIdentifierToken
(
value
,
{
startColumn
:
_startColumn2
,
endColumn
:
_endColumn2
});
continue
;
}
if
(
NUMBERS
.
test
(
char
)
||
NUMBER_KEYWORDS
.
test
(
lookahead
(
3
,
0
))
||
char
===
"-"
||
char
===
"+"
)
{
var
_startColumn3
=
column
;
var
_value
=
numberLiteralFSM
.
run
(
input
.
slice
(
current
));
if
(
_value
===
""
)
{
throw
new
Error
(
getCodeFrame
(
input
,
line
,
column
)
+
"Unexpected character "
+
JSON
.
stringify
(
char
));
}
pushNumberToken
(
_value
,
{
startColumn
:
_startColumn3
});
eatCharacter
(
_value
.
length
);
if
(
char
&&
!
PARENS
.
test
(
char
)
&&
!
WHITESPACE
.
test
(
char
))
{
throw
new
Error
(
getCodeFrame
(
input
,
line
,
column
)
+
"Unexpected character "
+
JSON
.
stringify
(
char
));
}
continue
;
}
if
(
char
===
'"'
)
{
var
_startColumn4
=
column
;
var
_value2
=
""
;
eatCharacter
();
// "
while
(
char
!==
'"'
)
{
if
(
isNewLine
(
char
))
{
throw
new
Error
(
getCodeFrame
(
input
,
line
,
column
)
+
"Unexpected character "
+
JSON
.
stringify
(
char
));
}
_value2
+=
char
;
eatCharacter
();
// char
}
eatCharacter
();
// "
var
_endColumn3
=
column
;
pushStringToken
(
_value2
,
{
startColumn
:
_startColumn4
,
endColumn
:
_endColumn3
});
continue
;
}
if
(
LETTERS
.
test
(
char
))
{
var
_value3
=
""
;
var
_startColumn5
=
column
;
while
(
char
&&
LETTERS
.
test
(
char
))
{
_value3
+=
char
;
eatCharacter
();
}
/*
* Handle MemberAccess
*/
if
(
char
===
"."
)
{
var
dotStartColumn
=
column
;
if
(
valtypes
.
indexOf
(
_value3
)
!==
-
1
)
{
pushValtypeToken
(
_value3
,
{
startColumn
:
_startColumn5
});
}
else
{
pushNameToken
(
_value3
);
}
eatCharacter
();
_value3
=
""
;
var
nameStartColumn
=
column
;
while
(
LETTERS
.
test
(
char
))
{
_value3
+=
char
;
eatCharacter
();
}
pushDotToken
(
"."
,
{
startColumn
:
dotStartColumn
});
pushNameToken
(
_value3
,
{
startColumn
:
nameStartColumn
});
continue
;
}
/*
* Handle keywords
*/
// $FlowIgnore
if
(
typeof
keywords
[
_value3
]
===
"string"
)
{
pushKeywordToken
(
_value3
,
{
startColumn
:
_startColumn5
});
continue
;
}
/*
* Handle types
*/
if
(
valtypes
.
indexOf
(
_value3
)
!==
-
1
)
{
pushValtypeToken
(
_value3
,
{
startColumn
:
_startColumn5
});
continue
;
}
/*
* Handle literals
*/
pushNameToken
(
_value3
,
{
startColumn
:
_startColumn5
});
continue
;
}
throw
new
Error
(
getCodeFrame
(
input
,
line
,
column
)
+
"Unexpected character "
+
JSON
.
stringify
(
char
));
}
return
tokens
;
}
export
var
tokens
=
tokenTypes
;
Event Timeline
Log In to Comment