Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F101352679
PhutilUTF8TestCase.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Feb 8, 12:18
Size
20 KB
Mime Type
text/x-php
Expires
Mon, Feb 10, 12:18 (1 d, 20 h)
Engine
blob
Format
Raw Data
Handle
24142150
Attached To
rPHU libphutil
PhutilUTF8TestCase.php
View Options
<?php
/**
* Test cases for functions in utf8.php.
*/
final
class
PhutilUTF8TestCase
extends
PhutilTestCase
{
public
function
testUTF8izeASCIIIgnored
()
{
$input
=
"this
\x
01 is a
\x
7f test string"
;
$this
->
assertEqual
(
$input
,
phutil_utf8ize
(
$input
));
}
public
function
testUTF8izeUTF8Ignored
()
{
$input
=
"
\x
c3
\x
9c
\x
c3
\x
bc
\x
e6
\x
9d
\x
b1!"
;
$this
->
assertEqual
(
$input
,
phutil_utf8ize
(
$input
));
}
public
function
testUTF8izeLongStringNosegfault
()
{
// For some reason my laptop is segfaulting on long inputs inside
// preg_match(). Forestall this craziness in the common case, at least.
phutil_utf8ize
(
str_repeat
(
'x'
,
1024
*
1024
));
$this
->
assertTrue
(
true
);
}
public
function
testUTF8izeInvalidUTF8Fixed
()
{
$input
=
"
\x
c3 this has
\x
e6
\x
9d some invalid utf8
\x
e6"
;
$expect
=
"
\x
EF
\x
BF
\x
BD this has
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BD some invalid utf8 "
.
"
\x
EF
\x
BF
\x
BD"
;
$result
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$result
);
}
public
function
testUTF8izeOwlIsCuteAndFerocious
()
{
// This was once a ferocious owl when we used to use "?" as the replacement
// character instead of U+FFFD, but now he is sort of not as cute or
// ferocious.
$input
=
"M(o
\x
EE
\x
FF
\x
FFo)M"
;
$expect
=
"M(o
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BDo)M"
;
$result
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$result
);
}
public
function
testOverlongFormFiltering
()
{
$bad
=
"
\x
EF
\x
BF
\x
BD"
;
$map
=
array
(
'quack'
=>
'quack'
,
// This is U+1000, a valid character.
"
\x
E1
\x
80
\x
80"
=>
"
\x
E1
\x
80
\x
80"
,
// This is a 2-byte encoding of U+0000.
"
\x
C0
\x
80"
=>
"{$bad}{$bad}"
,
// This is a 3-byte encoding of U+0020.
"
\x
E0
\x
80
\x
A0"
=>
"{$bad}{$bad}{$bad}"
,
"A
\x
E0
\x
83
\x
83"
=>
"A {$bad}{$bad}{$bad}"
,
);
foreach
(
$map
as
$input
=>
$expect
)
{
$actual
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$actual
,
pht
(
'Overlong form canonicalization of: %s'
,
$input
));
}
}
public
function
testSurrogateFiltering
()
{
$bad
=
"
\x
EF
\x
BF
\x
BD"
;
$map
=
array
(
"A
\x
ED
\x
A9
\x
98"
=>
"A {$bad}{$bad}{$bad}"
,
);
foreach
(
$map
as
$input
=>
$expect
)
{
$actual
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$actual
,
pht
(
'Surrogate filtering: %s'
,
$input
));
}
}
public
function
testUTF8CodepointEncoding
()
{
$map
=
array
(
0x20
=>
' '
,
0x7E
=>
'~'
,
0xE9
=>
"
\x
C3
\x
A9"
,
0x2603
=>
"
\x
E2
\x
98
\x
83"
,
0x1F417
=>
"
\x
F0
\x
9F
\x
90
\x
97"
,
);
foreach
(
$map
as
$input
=>
$expect
)
{
$actual
=
phutil_utf8_encode_codepoint
(
$input
);
$this
->
assertEqual
(
$expect
,
$actual
,
pht
(
'UTF8 codepoint encoding of "%s".'
,
$input
));
}
}
public
function
testUTF8len
()
{
$strings
=
array
(
''
=>
0
,
'x'
=>
1
,
"
\x
EF
\x
BF
\x
BD"
=>
1
,
"x
\x
e6
\x
9d
\x
b1y"
=>
3
,
'xyz'
=>
3
,
'quack'
=>
5
,
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_strlen
(
$str
),
'Length of '
.
$str
);
}
}
public
function
testUTF8v
()
{
$strings
=
array
(
''
=>
array
(),
'x'
=>
array
(
'x'
),
'quack'
=>
array
(
'q'
,
'u'
,
'a'
,
'c'
,
'k'
),
"x
\x
e6
\x
9d
\x
b1y"
=>
array
(
'x'
,
"
\x
e6
\x
9d
\x
b1"
,
'y'
),
// This is a combining character.
"x
\x
CD
\x
A0y"
=>
array
(
'x'
,
"
\x
CD
\x
A0"
,
'y'
),
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8v
(
$str
),
'Vector of '
.
$str
);
}
}
public
function
testUTF8vCodepoints
()
{
$strings
=
array
(
''
=>
array
(),
'x'
=>
array
(
0x78
),
'quack'
=>
array
(
0x71
,
0x75
,
0x61
,
0x63
,
0x6B
),
"x
\x
e6
\x
9d
\x
b1y"
=>
array
(
0x78
,
0x6771
,
0x79
),
"
\x
C2
\x
BB"
=>
array
(
0x00BB
),
"
\x
E2
\x
98
\x
83"
=>
array
(
0x2603
),
"
\x
EF
\x
BF
\x
BF"
=>
array
(
0xFFFF
),
"
\x
F0
\x
9F
\x
92
\x
A9"
=>
array
(
0x1F4A9
),
// This is a combining character.
"x
\x
CD
\x
A0y"
=>
array
(
0x78
,
0x0360
,
0x79
),
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8v_codepoints
(
$str
),
pht
(
'Codepoint Vector of %s'
,
$str
));
}
}
public
function
testUTF8ConsoleStrlen
()
{
$strings
=
array
(
''
=>
0
,
"
\0
"
=>
0
,
'x'
=>
1
,
// Double-width chinese character.
"
\x
e6
\x
9d
\x
b1"
=>
2
,
// Combining character.
"x
\x
CD
\x
A0y"
=>
2
,
// Combining plus double-width.
"
\x
e6
\x
9d
\x
b1
\x
CD
\x
A0y"
=>
3
,
// Colors and formatting.
"
\x
1B[1mx
\x
1B[m"
=>
1
,
"
\x
1B[1m
\x
1B[31mx
\x
1B[m"
=>
1
,
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_console_strlen
(
$str
),
pht
(
'Console Length of %s'
,
$str
));
}
}
public
function
testUTF8shorten
()
{
$inputs
=
array
(
array
(
'1erp derp derp'
,
9
,
''
,
'1erp derp'
),
array
(
'2erp derp derp'
,
12
,
'...'
,
'2erp derp...'
),
array
(
'derpxderpxderp'
,
12
,
'...'
,
'derpxderp...'
),
array
(
"derp
\x
E2
\x
99
\x
83derpderp"
,
12
,
'...'
,
"derp
\x
E2
\x
99
\x
83derp..."
),
array
(
''
,
12
,
'...'
,
''
),
array
(
'derp'
,
12
,
'...'
,
'derp'
),
array
(
'11111'
,
5
,
'2222'
,
'11111'
),
array
(
'111111'
,
5
,
'2222'
,
'12222'
),
array
(
'D1rp. Derp derp.'
,
7
,
'...'
,
'D1rp.'
),
// "D2rp." is a better shortening of this, but it's dramatically more
// complicated to implement with the newer byte/glyph/character
// shortening code.
array
(
'D2rp. Derp derp.'
,
5
,
'...'
,
'D2...'
),
array
(
'D3rp. Derp derp.'
,
4
,
'...'
,
'D...'
),
array
(
'D4rp. Derp derp.'
,
14
,
'...'
,
'D4rp. Derp...'
),
array
(
'D5rpderp, derp derp'
,
16
,
'...'
,
'D5rpderp...'
),
array
(
'D6rpderp, derp derp'
,
17
,
'...'
,
'D6rpderp, derp...'
),
// Strings with combining characters.
array
(
"Gr
\x
CD
\x
A0mpyCatSmiles"
,
8
,
'...'
,
"Gr
\x
CD
\x
A0mpy..."
),
array
(
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0Y"
,
1
,
''
,
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0"
),
array
(
'Derp, supercalafragalisticexpialadoshus'
,
30
,
'...'
,
'Derp, supercalafragalistice...'
,
),
// If a string has only word-break characters in it, we should just cut
// it, not produce only the terminal.
array
(
'(((((((((('
,
8
,
'...'
,
'(((((...'
),
// Terminal is longer than requested input.
array
(
'derp'
,
3
,
'quack'
,
'quack'
),
array
(
'O123: com/oracle/java/path/to/application/source/ThingFactory.java'
,
32
,
'...'
,
'O123: com/oracle/java/path/to...'
,
),
);
foreach
(
$inputs
as
$input
)
{
list
(
$string
,
$length
,
$terminal
,
$expect
)
=
$input
;
$result
=
id
(
new
PhutilUTF8StringTruncator
())
->
setMaximumGlyphs
(
$length
)
->
setTerminator
(
$terminal
)
->
truncateString
(
$string
);
$this
->
assertEqual
(
$expect
,
$result
,
pht
(
'Shortening of %s'
,
$string
));
}
}
public
function
testUTF8StringTruncator
()
{
$cases
=
array
(
array
(
"o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0"
,
6
,
"o
\x
CD
\x
A0!"
,
6
,
"o
\x
CD
\x
A0o
\x
CD
\x
A0!"
,
6
,
"o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0o
\x
CD
\x
A0"
,
),
array
(
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0Y"
,
6
,
'!'
,
6
,
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0Y"
,
6
,
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0Y"
,
),
array
(
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0YZ"
,
6
,
'!'
,
5
,
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0!"
,
2
,
"X
\x
CD
\x
A0
\x
CD
\x
A0
\x
CD
\x
A0!"
,
),
array
(
"
\x
E2
\x
98
\x
83
\x
E2
\x
98
\x
83
\x
E2
\x
98
\x
83
\x
E2
\x
98
\x
83"
,
4
,
"
\x
E2
\x
98
\x
83!"
,
3
,
"
\x
E2
\x
98
\x
83
\x
E2
\x
98
\x
83!"
,
3
,
"
\x
E2
\x
98
\x
83
\x
E2
\x
98
\x
83!"
,
),
);
foreach
(
$cases
as
$case
)
{
list
(
$input
,
$b_len
,
$b_out
,
$p_len
,
$p_out
,
$g_len
,
$g_out
)
=
$case
;
$result
=
id
(
new
PhutilUTF8StringTruncator
())
->
setMaximumBytes
(
$b_len
)
->
setTerminator
(
'!'
)
->
truncateString
(
$input
);
$this
->
assertEqual
(
$b_out
,
$result
,
pht
(
'byte-short of %s'
,
$input
));
$result
=
id
(
new
PhutilUTF8StringTruncator
())
->
setMaximumCodepoints
(
$p_len
)
->
setTerminator
(
'!'
)
->
truncateString
(
$input
);
$this
->
assertEqual
(
$p_out
,
$result
,
pht
(
'codepoint-short of %s'
,
$input
));
$result
=
id
(
new
PhutilUTF8StringTruncator
())
->
setMaximumGlyphs
(
$g_len
)
->
setTerminator
(
'!'
)
->
truncateString
(
$input
);
$this
->
assertEqual
(
$g_out
,
$result
,
pht
(
'glyph-short of %s'
,
$input
));
}
}
public
function
testUTF8LargeTruncation
()
{
// This is testing that our performance is reasonable when truncating a
// large input into a small output. Runtime should be on the order of the
// output size, not the input size.
$whale
=
"
\x
F0
\x
9F
\x
90
\x
B3"
;
$input
=
str_repeat
(
$whale
,
1024
*
1024
);
$result
=
id
(
new
PhutilUTF8StringTruncator
())
->
setMaximumBytes
(
16
)
->
setTerminator
(
'!'
)
->
truncateString
(
$input
);
$this
->
assertEqual
(
str_repeat
(
$whale
,
3
).
'!'
,
$result
,
pht
(
'Large truncation.'
));
}
public
function
testUTF8Wrap
()
{
$inputs
=
array
(
array
(
'aaaaaaa'
,
3
,
array
(
'aaa'
,
'aaa'
,
'a'
,
),
),
array
(
'aa<b>aaaaa'
,
3
,
array
(
'aa<b>a'
,
'aaa'
,
'a'
,
),
),
array
(
'aa&aaaa'
,
3
,
array
(
'aa&'
,
'aaa'
,
'a'
,
),
),
array
(
"aa
\x
e6
\x
9d
\x
b1aaaa"
,
3
,
array
(
"aa
\x
e6
\x
9d
\x
b1"
,
'aaa'
,
'a'
,
),
),
array
(
''
,
80
,
array
(
),
),
array
(
'a'
,
80
,
array
(
'a'
,
),
),
);
foreach
(
$inputs
as
$input
)
{
list
(
$string
,
$width
,
$expect
)
=
$input
;
$this
->
assertEqual
(
$expect
,
phutil_utf8_hard_wrap_html
(
$string
,
$width
),
pht
(
"Wrapping of '%s'."
,
$string
));
}
}
public
function
testUTF8NonHTMLWrap
()
{
$inputs
=
array
(
array
(
'aaaaaaa'
,
3
,
array
(
'aaa'
,
'aaa'
,
'a'
,
),
),
array
(
'abracadabra!'
,
4
,
array
(
'abra'
,
'cada'
,
'bra!'
,
),
),
array
(
''
,
10
,
array
(
),
),
array
(
'a'
,
20
,
array
(
'a'
,
),
),
array
(
"aa
\x
e6
\x
9d
\x
b1aaaa"
,
3
,
array
(
"aa
\x
e6
\x
9d
\x
b1"
,
'aaa'
,
'a'
,
),
),
array
(
"mmm
\n
mmm
\n
mmmm"
,
3
,
array
(
'mmm'
,
'mmm'
,
'mmm'
,
'm'
,
),
),
);
foreach
(
$inputs
as
$input
)
{
list
(
$string
,
$width
,
$expect
)
=
$input
;
$this
->
assertEqual
(
$expect
,
phutil_utf8_hard_wrap
(
$string
,
$width
),
pht
(
"Wrapping of '%s'"
,
$string
));
}
}
public
function
testUTF8ConvertParams
()
{
$caught
=
null
;
try
{
phutil_utf8_convert
(
''
,
'utf8'
,
''
);
}
catch
(
Exception
$ex
)
{
$caught
=
$ex
;
}
$this
->
assertTrue
((
bool
)
$caught
,
pht
(
'Requires source encoding.'
));
$caught
=
null
;
try
{
phutil_utf8_convert
(
''
,
''
,
'utf8'
);
}
catch
(
Exception
$ex
)
{
$caught
=
$ex
;
}
$this
->
assertTrue
((
bool
)
$caught
,
pht
(
'Requires target encoding.'
));
}
public
function
testUTF8Convert
()
{
if
(!
function_exists
(
'mb_convert_encoding'
))
{
$this
->
assertSkipped
(
pht
(
'Requires %s extension.'
,
'mbstring'
));
}
// "[ae]gis se[n]or [(c)] 1970 [+/-] 1 [degree]"
$input
=
"
\x
E6gis SE
\x
D1OR
\x
A9 1970
\x
B11
\x
B0"
;
$expect
=
"
\x
C3
\x
A6gis SE
\x
C3
\x
91OR
\x
C2
\x
A9 1970
\x
C2
\x
B11
\x
C2
\x
B0"
;
$output
=
phutil_utf8_convert
(
$input
,
'UTF-8'
,
'ISO-8859-1'
);
$this
->
assertEqual
(
$expect
,
$output
,
pht
(
'Conversion from ISO-8859-1.'
));
$caught
=
null
;
try
{
phutil_utf8_convert
(
'xyz'
,
'moon language'
,
'UTF-8'
);
}
catch
(
Exception
$ex
)
{
$caught
=
$ex
;
}
$this
->
assertTrue
((
bool
)
$caught
,
pht
(
'Conversion with bogus encoding.'
));
}
public
function
testUTF8ucwords
()
{
$tests
=
array
(
''
=>
''
,
'x'
=>
'X'
,
'X'
=>
'X'
,
'five short graybles'
=>
'Five Short Graybles'
,
'xXxSNiPeRKiLLeRxXx'
=>
'XXxSNiPeRKiLLeRxXx'
,
);
foreach
(
$tests
as
$input
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_ucwords
(
$input
),
'phutil_utf8_ucwords("'
.
$input
.
'")'
);
}
}
public
function
testUTF8strtolower
()
{
$tests
=
array
(
''
=>
''
,
'a'
=>
'a'
,
'A'
=>
'a'
,
'!'
=>
'!'
,
'OMG!~ LOLolol ROFLwaffle11~'
=>
'omg!~ lololol roflwaffle11~'
,
"
\x
E2
\x
98
\x
83"
=>
"
\x
E2
\x
98
\x
83"
,
);
foreach
(
$tests
as
$input
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_strtolower
(
$input
),
'phutil_utf8_strtolower("'
.
$input
.
'")'
);
}
}
public
function
testUTF8strtoupper
()
{
$tests
=
array
(
''
=>
''
,
'a'
=>
'A'
,
'A'
=>
'A'
,
'!'
=>
'!'
,
'Cats have 9 lives.'
=>
'CATS HAVE 9 LIVES.'
,
"
\x
E2
\x
98
\x
83"
=>
"
\x
E2
\x
98
\x
83"
,
);
foreach
(
$tests
as
$input
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_strtoupper
(
$input
),
'phutil_utf8_strtoupper("'
.
$input
.
'")'
);
}
}
public
function
testUTF8IsCombiningCharacter
()
{
$character
=
"
\x
CD
\x
A0"
;
$this
->
assertEqual
(
true
,
phutil_utf8_is_combining_character
(
$character
));
$character
=
'a'
;
$this
->
assertEqual
(
false
,
phutil_utf8_is_combining_character
(
$character
));
}
public
function
testUTF8vCombined
()
{
// Empty string.
$string
=
''
;
$this
->
assertEqual
(
array
(),
phutil_utf8v_combined
(
$string
));
// Single character.
$string
=
'x'
;
$this
->
assertEqual
(
array
(
'x'
),
phutil_utf8v_combined
(
$string
));
// No combining characters.
$string
=
'cat'
;
$this
->
assertEqual
(
array
(
'c'
,
'a'
,
't'
),
phutil_utf8v_combined
(
$string
));
// String with a combining character in the middle.
$string
=
"ca
\x
CD
\x
A0t"
;
$this
->
assertEqual
(
array
(
'c'
,
"a
\x
CD
\x
A0"
,
't'
),
phutil_utf8v_combined
(
$string
));
// String starting with a combined character.
$string
=
"c
\x
CD
\x
A0at"
;
$this
->
assertEqual
(
array
(
"c
\x
CD
\x
A0"
,
'a'
,
't'
),
phutil_utf8v_combined
(
$string
));
// String with trailing combining character.
$string
=
"cat
\x
CD
\x
A0"
;
$this
->
assertEqual
(
array
(
'c'
,
'a'
,
"t
\x
CD
\x
A0"
),
phutil_utf8v_combined
(
$string
));
// String with muliple combined characters.
$string
=
"c
\x
CD
\x
A0a
\x
CD
\x
A0t
\x
CD
\x
A0"
;
$this
->
assertEqual
(
array
(
"c
\x
CD
\x
A0"
,
"a
\x
CD
\x
A0"
,
"t
\x
CD
\x
A0"
),
phutil_utf8v_combined
(
$string
));
// String with multiple combining characters.
$string
=
"ca
\x
CD
\x
A0
\x
CD
\x
A0t"
;
$this
->
assertEqual
(
array
(
'c'
,
"a
\x
CD
\x
A0
\x
CD
\x
A0"
,
't'
),
phutil_utf8v_combined
(
$string
));
// String beginning with a combining character.
$string
=
"
\x
CD
\x
A0
\x
CD
\x
A0c"
;
$this
->
assertEqual
(
array
(
"
\x
CD
\x
A0
\x
CD
\x
A0"
,
'c'
),
phutil_utf8v_combined
(
$string
));
}
public
function
testUTF8BMPSegfaults
()
{
// This test case fails by segfaulting, or passes by not segfaulting. See
// the function implementation for details.
$input
=
str_repeat
(
"
\x
EF
\x
BF
\x
BF"
,
1024
*
32
);
phutil_is_utf8_with_only_bmp_characters
(
$input
);
$this
->
assertTrue
(
true
);
}
public
function
testCJK
()
{
$map
=
array
(
''
=>
false
,
'a'
=>
false
,
'.'
=>
false
,
"
\x
E2
\x
98
\x
83"
=>
false
,
"
\x
E5
\x
A0
\x
B1"
=>
true
,
);
foreach
(
$map
as
$input
=>
$expect
)
{
$actual
=
phutil_utf8_is_cjk
(
$input
);
$this
->
assertEqual
(
$expect
,
$actual
,
pht
(
'CJK: "%s"'
,
$input
));
}
}
public
function
testUTF8BMP
()
{
$tests
=
array
(
''
=>
array
(
true
,
true
,
pht
(
'empty string'
),
),
'a'
=>
array
(
true
,
true
,
'a'
,
),
"a
\x
CD
\x
A0
\x
CD
\x
A0"
=>
array
(
true
,
true
,
pht
(
'%s with combining'
,
'a'
),
),
"
\x
E2
\x
98
\x
83"
=>
array
(
true
,
true
,
pht
(
'snowman'
),
),
// This is the last character in BMP, U+FFFF.
"
\x
EF
\x
BF
\x
BF"
=>
array
(
true
,
true
,
'U+FFFF'
,
),
// This isn't valid.
"
\x
EF
\x
BF
\x
C0"
=>
array
(
false
,
false
,
pht
(
'Invalid, byte range.'
),
),
// This is an invalid nonminimal representation.
"
\x
F0
\x
81
\x
80
\x
80"
=>
array
(
false
,
false
,
pht
(
'Nonminimal 4-byte character.'
),
),
// This is the first character above BMP, U+10000.
"
\x
F0
\x
90
\x
80
\x
80"
=>
array
(
true
,
false
,
'U+10000'
,
),
"
\x
F0
\x
9D
\x
84
\x
9E"
=>
array
(
true
,
false
,
'gclef'
,
),
"musical
\x
F0
\x
9D
\x
84
\x
9E g-clef"
=>
array
(
true
,
false
,
pht
(
'gclef text'
),
),
"
\x
F0
\x
9D
\x
84"
=>
array
(
false
,
false
,
pht
(
'Invalid, truncated.'
),
),
"
\x
E0
\x
80
\x
80"
=>
array
(
false
,
false
,
pht
(
'Nonminimal 3-byte character.'
),
),
// Partial BMP characters.
"
\x
CD"
=>
array
(
false
,
false
,
pht
(
'Partial 2-byte character.'
),
),
"
\x
E0
\x
A0"
=>
array
(
false
,
false
,
pht
(
'Partial BMP 0xE0 character.'
),
),
"
\x
E2
\x
98"
=>
array
(
false
,
false
,
pht
(
'Partial BMP cahracter.'
),
),
);
foreach
(
$tests
as
$input
=>
$test
)
{
list
(
$expect_utf8
,
$expect_bmp
,
$test_name
)
=
$test
;
// Depending on what's installed on the system, this may use an
// extension.
$this
->
assertEqual
(
$expect_utf8
,
phutil_is_utf8
(
$input
),
pht
(
'is_utf(%s)'
,
$test_name
));
// Also test this against the pure PHP implementation, explicitly.
$this
->
assertEqual
(
$expect_utf8
,
phutil_is_utf8_slowly
(
$input
),
pht
(
'is_utf_slowly(%s)'
,
$test_name
));
$this
->
assertEqual
(
$expect_bmp
,
phutil_is_utf8_with_only_bmp_characters
(
$input
),
pht
(
'is_utf_bmp(%s)'
,
$test_name
));
}
}
public
function
testSystemLocaleManagement
()
{
$original_locale
=
phutil_get_system_locale
();
$this
->
assertTrue
(
(
strlen
(
$original_locale
)
>
0
),
pht
(
'System has some identifiable locale.'
));
$this
->
assertFalse
(
phutil_is_system_locale_available
(
'duck.quack'
),
pht
(
'Imaginary locale should be unavailable.'
));
$this
->
assertEqual
(
$original_locale
,
phutil_get_system_locale
(),
pht
(
'Testing locale availability should not change the locale.'
));
$this
->
assertTrue
(
phutil_is_system_locale_available
(
$original_locale
),
pht
(
'The current locale should be available.'
));
$caught
=
null
;
try
{
phutil_set_system_locale
(
'duck.quack'
);
}
catch
(
Exception
$ex
)
{
$caught
=
$ex
;
}
$this
->
assertTrue
(
(
$caught
instanceof
Exception
),
pht
(
'Setting an imaginary locale should raise an exception.'
));
// We need two locales for the next part because one of them might be the
// current locale, and we want to make sure we can actually change the
// locale value.
// If the current locale was "zz_ZZ", and then we do this:
//
// set_locale("zz_ZZ");
// assert("zz_ZZ" == get_locale());
//
// ...the test could pass even if "set_locale(...)" does nothing.
$has_us
=
phutil_is_system_locale_available
(
'en_US.UTF-8'
);
$has_gb
=
phutil_is_system_locale_available
(
'en_GB.UTF-8'
);
if
(!
$has_us
||
!
$has_gb
)
{
$this
->
assertSkipped
(
pht
(
'System does not have en_US + en_GB to do locale adjustment '
.
'tests.'
));
}
phutil_set_system_locale
(
'en_US.UTF-8'
);
$this
->
assertEqual
(
'en_US.UTF-8'
,
phutil_get_system_locale
(),
pht
(
'Set locale to en_US.'
));
phutil_set_system_locale
(
'en_GB.UTF-8'
);
$this
->
assertEqual
(
'en_GB.UTF-8'
,
phutil_get_system_locale
(),
pht
(
'Set locale to en_GB.'
));
// Put things back the way they were.
phutil_set_system_locale
(
$original_locale
);
}
}
Event Timeline
Log In to Comment