Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F99852669
create-tokenizer.js
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jan 26, 23:26
Size
8 KB
Mime Type
text/x-c
Expires
Tue, Jan 28, 23:26 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
23835976
Attached To
rOACCT Open Access Compliance Check Tool (OACCT)
create-tokenizer.js
View Options
'use strict'
var assign = require('../constant/assign.js')
var markdownLineEnding = require('../character/markdown-line-ending.js')
var chunkedPush = require('./chunked-push.js')
var chunkedSplice = require('./chunked-splice.js')
var miniflat = require('./miniflat.js')
var resolveAll = require('./resolve-all.js')
var serializeChunks = require('./serialize-chunks.js')
var shallow = require('./shallow.js')
var sliceChunks = require('./slice-chunks.js')
// Create a tokenizer.
// Tokenizers deal with one type of data (e.g., containers, flow, text).
// The parser is the object dealing with it all.
// `initialize` works like other constructs, except that only its `tokenize`
// function is used, in which case it doesn’t receive an `ok` or `nok`.
// `from` can be given to set the point before the first character, although
// when further lines are indented, they must be set with `defineSkip`.
function createTokenizer(parser, initialize, from) {
var point = from
? shallow(from)
: {
line: 1,
column: 1,
offset: 0
}
var columnStart = {}
var resolveAllConstructs = []
var chunks = []
var stack = []
var effects = {
consume: consume,
enter: enter,
exit: exit,
attempt: constructFactory(onsuccessfulconstruct),
check: constructFactory(onsuccessfulcheck),
interrupt: constructFactory(onsuccessfulcheck, {
interrupt: true
}),
lazy: constructFactory(onsuccessfulcheck, {
lazy: true
})
} // State and tools for resolving and serializing.
var context = {
previous: null,
events: [],
parser: parser,
sliceStream: sliceStream,
sliceSerialize: sliceSerialize,
now: now,
defineSkip: skip,
write: write
} // The state function.
var state = initialize.tokenize.call(context, effects) // Track which character we expect to be consumed, to catch bugs.
if (initialize.resolveAll) {
resolveAllConstructs.push(initialize)
} // Store where we are in the input stream.
point._index = 0
point._bufferIndex = -1
return context
function write(slice) {
chunks = chunkedPush(chunks, slice)
main() // Exit if we’re not done, resolve might change stuff.
if (chunks[chunks.length - 1] !== null) {
return []
}
addResult(initialize, 0) // Otherwise, resolve, and exit.
context.events = resolveAll(resolveAllConstructs, context.events, context)
return context.events
} //
// Tools.
//
function sliceSerialize(token) {
return serializeChunks(sliceStream(token))
}
function sliceStream(token) {
return sliceChunks(chunks, token)
}
function now() {
return shallow(point)
}
function skip(value) {
columnStart[value.line] = value.column
accountForPotentialSkip()
} //
// State management.
//
// Main loop (note that `_index` and `_bufferIndex` in `point` are modified by
// `consume`).
// Here is where we walk through the chunks, which either include strings of
// several characters, or numerical character codes.
// The reason to do this in a loop instead of a call is so the stack can
// drain.
function main() {
var chunkIndex
var chunk
while (point._index < chunks.length) {
chunk = chunks[point._index] // If we’re in a buffer chunk, loop through it.
if (typeof chunk === 'string') {
chunkIndex = point._index
if (point._bufferIndex < 0) {
point._bufferIndex = 0
}
while (
point._index === chunkIndex &&
point._bufferIndex < chunk.length
) {
go(chunk.charCodeAt(point._bufferIndex))
}
} else {
go(chunk)
}
}
} // Deal with one code.
function go(code) {
state = state(code)
} // Move a character forward.
function consume(code) {
if (markdownLineEnding(code)) {
point.line++
point.column = 1
point.offset += code === -3 ? 2 : 1
accountForPotentialSkip()
} else if (code !== -1) {
point.column++
point.offset++
} // Not in a string chunk.
if (point._bufferIndex < 0) {
point._index++
} else {
point._bufferIndex++ // At end of string chunk.
if (point._bufferIndex === chunks[point._index].length) {
point._bufferIndex = -1
point._index++
}
} // Expose the previous character.
context.previous = code // Mark as consumed.
} // Start a token.
function enter(type, fields) {
var token = fields || {}
token.type = type
token.start = now()
context.events.push(['enter', token, context])
stack.push(token)
return token
} // Stop a token.
function exit(type) {
var token = stack.pop()
token.end = now()
context.events.push(['exit', token, context])
return token
} // Use results.
function onsuccessfulconstruct(construct, info) {
addResult(construct, info.from)
} // Discard results.
function onsuccessfulcheck(construct, info) {
info.restore()
} // Factory to attempt/check/interrupt.
function constructFactory(onreturn, fields) {
return hook // Handle either an object mapping codes to constructs, a list of
// constructs, or a single construct.
function hook(constructs, returnState, bogusState) {
var listOfConstructs
var constructIndex
var currentConstruct
var info
return constructs.tokenize || 'length' in constructs
? handleListOfConstructs(miniflat(constructs))
: handleMapOfConstructs
function handleMapOfConstructs(code) {
if (code in constructs || null in constructs) {
return handleListOfConstructs(
constructs.null
? /* c8 ignore next */
miniflat(constructs[code]).concat(miniflat(constructs.null))
: constructs[code]
)(code)
}
return bogusState(code)
}
function handleListOfConstructs(list) {
listOfConstructs = list
constructIndex = 0
return handleConstruct(list[constructIndex])
}
function handleConstruct(construct) {
return start
function start(code) {
// To do: not nede to store if there is no bogus state, probably?
// Currently doesn’t work because `inspect` in document does a check
// w/o a bogus, which doesn’t make sense. But it does seem to help perf
// by not storing.
info = store()
currentConstruct = construct
if (!construct.partial) {
context.currentConstruct = construct
}
if (
construct.name &&
context.parser.constructs.disable.null.indexOf(construct.name) > -1
) {
return nok()
}
return construct.tokenize.call(
fields ? assign({}, context, fields) : context,
effects,
ok,
nok
)(code)
}
}
function ok(code) {
onreturn(currentConstruct, info)
return returnState
}
function nok(code) {
info.restore()
if (++constructIndex < listOfConstructs.length) {
return handleConstruct(listOfConstructs[constructIndex])
}
return bogusState
}
}
}
function addResult(construct, from) {
if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) {
resolveAllConstructs.push(construct)
}
if (construct.resolve) {
chunkedSplice(
context.events,
from,
context.events.length - from,
construct.resolve(context.events.slice(from), context)
)
}
if (construct.resolveTo) {
context.events = construct.resolveTo(context.events, context)
}
}
function store() {
var startPoint = now()
var startPrevious = context.previous
var startCurrentConstruct = context.currentConstruct
var startEventsIndex = context.events.length
var startStack = Array.from(stack)
return {
restore: restore,
from: startEventsIndex
}
function restore() {
point = startPoint
context.previous = startPrevious
context.currentConstruct = startCurrentConstruct
context.events.length = startEventsIndex
stack = startStack
accountForPotentialSkip()
}
}
function accountForPotentialSkip() {
if (point.line in columnStart && point.column < 2) {
point.column = columnStart[point.line]
point.offset += columnStart[point.line] - 1
}
}
}
module.exports = createTokenizer
Event Timeline
Log In to Comment