Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F95331012
decode.js
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Dec 14, 19:13
Size
6 KB
Mime Type
text/x-java
Expires
Mon, Dec 16, 19:13 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22947298
Attached To
rOACCT Open Access Compliance Check Tool (OACCT)
decode.js
View Options
import
htmlDecodeTree
from
"./generated/decode-data-html.js"
;
import
xmlDecodeTree
from
"./generated/decode-data-xml.js"
;
import
decodeCodePoint
from
"./decode_codepoint.js"
;
// Re-export for use by eg. htmlparser2
export
{
htmlDecodeTree
,
xmlDecodeTree
,
decodeCodePoint
};
export
{
replaceCodePoint
,
fromCodePoint
}
from
"./decode_codepoint.js"
;
var
CharCodes
;
(
function
(
CharCodes
)
{
CharCodes
[
CharCodes
[
"NUM"
]
=
35
]
=
"NUM"
;
CharCodes
[
CharCodes
[
"SEMI"
]
=
59
]
=
"SEMI"
;
CharCodes
[
CharCodes
[
"ZERO"
]
=
48
]
=
"ZERO"
;
CharCodes
[
CharCodes
[
"NINE"
]
=
57
]
=
"NINE"
;
CharCodes
[
CharCodes
[
"LOWER_A"
]
=
97
]
=
"LOWER_A"
;
CharCodes
[
CharCodes
[
"LOWER_F"
]
=
102
]
=
"LOWER_F"
;
CharCodes
[
CharCodes
[
"LOWER_X"
]
=
120
]
=
"LOWER_X"
;
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
CharCodes
[
CharCodes
[
"To_LOWER_BIT"
]
=
32
]
=
"To_LOWER_BIT"
;
})(
CharCodes
||
(
CharCodes
=
{}));
export
var
BinTrieFlags
;
(
function
(
BinTrieFlags
)
{
BinTrieFlags
[
BinTrieFlags
[
"VALUE_LENGTH"
]
=
49152
]
=
"VALUE_LENGTH"
;
BinTrieFlags
[
BinTrieFlags
[
"BRANCH_LENGTH"
]
=
16256
]
=
"BRANCH_LENGTH"
;
BinTrieFlags
[
BinTrieFlags
[
"JUMP_TABLE"
]
=
127
]
=
"JUMP_TABLE"
;
})(
BinTrieFlags
||
(
BinTrieFlags
=
{}));
function
getDecoder
(
decodeTree
)
{
return
function
decodeHTMLBinary
(
str
,
strict
)
{
let
ret
=
""
;
let
lastIdx
=
0
;
let
strIdx
=
0
;
while
((
strIdx
=
str
.
indexOf
(
"&"
,
strIdx
))
>=
0
)
{
ret
+=
str
.
slice
(
lastIdx
,
strIdx
);
lastIdx
=
strIdx
;
// Skip the "&"
strIdx
+=
1
;
// If we have a numeric entity, handle this separately.
if
(
str
.
charCodeAt
(
strIdx
)
===
CharCodes
.
NUM
)
{
// Skip the leading "&#". For hex entities, also skip the leading "x".
let
start
=
strIdx
+
1
;
let
base
=
10
;
let
cp
=
str
.
charCodeAt
(
start
);
if
((
cp
|
CharCodes
.
To_LOWER_BIT
)
===
CharCodes
.
LOWER_X
)
{
base
=
16
;
strIdx
+=
1
;
start
+=
1
;
}
do
cp
=
str
.
charCodeAt
(
++
strIdx
);
while
((
cp
>=
CharCodes
.
ZERO
&&
cp
<=
CharCodes
.
NINE
)
||
(
base
===
16
&&
(
cp
|
CharCodes
.
To_LOWER_BIT
)
>=
CharCodes
.
LOWER_A
&&
(
cp
|
CharCodes
.
To_LOWER_BIT
)
<=
CharCodes
.
LOWER_F
));
if
(
start
!==
strIdx
)
{
const
entity
=
str
.
substring
(
start
,
strIdx
);
const
parsed
=
parseInt
(
entity
,
base
);
if
(
str
.
charCodeAt
(
strIdx
)
===
CharCodes
.
SEMI
)
{
strIdx
+=
1
;
}
else
if
(
strict
)
{
continue
;
}
ret
+=
decodeCodePoint
(
parsed
);
lastIdx
=
strIdx
;
}
continue
;
}
let
resultIdx
=
0
;
let
excess
=
1
;
let
treeIdx
=
0
;
let
current
=
decodeTree
[
treeIdx
];
for
(;
strIdx
<
str
.
length
;
strIdx
++
,
excess
++
)
{
treeIdx
=
determineBranch
(
decodeTree
,
current
,
treeIdx
+
1
,
str
.
charCodeAt
(
strIdx
));
if
(
treeIdx
<
0
)
break
;
current
=
decodeTree
[
treeIdx
];
const
masked
=
current
&
BinTrieFlags
.
VALUE_LENGTH
;
// If the branch is a value, store it and continue
if
(
masked
)
{
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if
(
!
strict
||
str
.
charCodeAt
(
strIdx
)
===
CharCodes
.
SEMI
)
{
resultIdx
=
treeIdx
;
excess
=
0
;
}
// The mask is the number of bytes of the value, including the current byte.
const
valueLength
=
(
masked
>>
14
)
-
1
;
if
(
valueLength
===
0
)
break
;
treeIdx
+=
valueLength
;
}
}
if
(
resultIdx
!==
0
)
{
const
valueLength
=
(
decodeTree
[
resultIdx
]
&
BinTrieFlags
.
VALUE_LENGTH
)
>>
14
;
ret
+=
valueLength
===
1
?
String
.
fromCharCode
(
decodeTree
[
resultIdx
]
&
~
BinTrieFlags
.
VALUE_LENGTH
)
:
valueLength
===
2
?
String
.
fromCharCode
(
decodeTree
[
resultIdx
+
1
])
:
String
.
fromCharCode
(
decodeTree
[
resultIdx
+
1
],
decodeTree
[
resultIdx
+
2
]);
lastIdx
=
strIdx
-
excess
+
1
;
}
}
return
ret
+
str
.
slice
(
lastIdx
);
};
}
export
function
determineBranch
(
decodeTree
,
current
,
nodeIdx
,
char
)
{
const
branchCount
=
(
current
&
BinTrieFlags
.
BRANCH_LENGTH
)
>>
7
;
const
jumpOffset
=
current
&
BinTrieFlags
.
JUMP_TABLE
;
// Case 1: Single branch encoded in jump offset
if
(
branchCount
===
0
)
{
return
jumpOffset
!==
0
&&
char
===
jumpOffset
?
nodeIdx
:
-
1
;
}
// Case 2: Multiple branches encoded in jump table
if
(
jumpOffset
)
{
const
value
=
char
-
jumpOffset
;
return
value
<
0
||
value
>=
branchCount
?
-
1
:
decodeTree
[
nodeIdx
+
value
]
-
1
;
}
// Case 3: Multiple branches encoded in dictionary
// Binary search for the character.
let
lo
=
nodeIdx
;
let
hi
=
lo
+
branchCount
-
1
;
while
(
lo
<=
hi
)
{
const
mid
=
(
lo
+
hi
)
>>>
1
;
const
midVal
=
decodeTree
[
mid
];
if
(
midVal
<
char
)
{
lo
=
mid
+
1
;
}
else
if
(
midVal
>
char
)
{
hi
=
mid
-
1
;
}
else
{
return
decodeTree
[
mid
+
branchCount
];
}
}
return
-
1
;
}
const
htmlDecoder
=
getDecoder
(
htmlDecodeTree
);
const
xmlDecoder
=
getDecoder
(
xmlDecodeTree
);
/**
* Decodes an HTML string, allowing for entities not terminated by a semi-colon.
*
* @param str The string to decode.
* @returns The decoded string.
*/
export
function
decodeHTML
(
str
)
{
return
htmlDecoder
(
str
,
false
);
}
/**
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon.
*
* @param str The string to decode.
* @returns The decoded string.
*/
export
function
decodeHTMLStrict
(
str
)
{
return
htmlDecoder
(
str
,
true
);
}
/**
* Decodes an XML string, requiring all entities to be terminated by a semi-colon.
*
* @param str The string to decode.
* @returns The decoded string.
*/
export
function
decodeXML
(
str
)
{
return
xmlDecoder
(
str
,
true
);
}
//# sourceMappingURL=decode.js.map
Event Timeline
Log In to Comment