Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62643517
PhutilUTF8TestCase.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 14, 13:39
Size
5 KB
Mime Type
text/x-php
Expires
Thu, May 16, 13:39 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17671883
Attached To
rPHU libphutil
PhutilUTF8TestCase.php
View Options
<?php
/*
* Copyright 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Test cases for functions in utf8.php.
*
* @group testcase
*/
final
class
PhutilUTF8TestCase
extends
ArcanistPhutilTestCase
{
public
function
testUTF8ize_ASCII_ignored
()
{
$input
=
"this
\x
01 is a
\x
7f test string"
;
$this
->
assertEqual
(
$input
,
phutil_utf8ize
(
$input
));
}
public
function
testUTF8ize_UTF8_ignored
()
{
$input
=
"
\x
c3
\x
9c
\x
c3
\x
bc
\x
e6
\x
9d
\x
b1!"
;
$this
->
assertEqual
(
$input
,
phutil_utf8ize
(
$input
));
}
public
function
testUTF8ize_LongString_nosegfault
()
{
// For some reason my laptop is segfaulting on long inputs inside
// preg_match(). Forestall this craziness in the common case, at least.
phutil_utf8ize
(
str_repeat
(
'x'
,
1024
*
1024
));
$this
->
assertEqual
(
true
,
true
);
}
public
function
testUTF8ize_invalidUTF8_fixed
()
{
$input
=
"
\x
c3 this has
\x
e6
\x
9d some invalid utf8
\x
e6"
;
$expect
=
"
\x
EF
\x
BF
\x
BD this has
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BD some invalid utf8 "
.
"
\x
EF
\x
BF
\x
BD"
;
$result
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$result
);
}
public
function
testUTF8ize_owl_isCuteAndFerocious
()
{
// This was once a ferocious owl when we used to use "?" as the replacement
// character instead of U+FFFD, but now he is sort of not as cute or
// ferocious.
$input
=
"M(o
\x
EE
\x
FF
\x
FFo)M"
;
$expect
=
"M(o
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BD
\x
EF
\x
BF
\x
BDo)M"
;
$result
=
phutil_utf8ize
(
$input
);
$this
->
assertEqual
(
$expect
,
$result
);
}
public
function
testUTF8len
()
{
$strings
=
array
(
''
=>
0
,
'x'
=>
1
,
"
\x
EF
\x
BF
\x
BD"
=>
1
,
"x
\x
e6
\x
9d
\x
b1y"
=>
3
,
"xyz"
=>
3
,
'quack'
=>
5
,
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8_strlen
(
$str
),
'Length of '
.
$str
);
}
}
public
function
testUTF8v
()
{
$strings
=
array
(
''
=>
array
(),
'x'
=>
array
(
'x'
),
'quack'
=>
array
(
'q'
,
'u'
,
'a'
,
'c'
,
'k'
),
"x
\x
e6
\x
9d
\x
b1y"
=>
array
(
'x'
,
"
\x
e6
\x
9d
\x
b1"
,
'y'
),
// TODO: This test does not pass. phutil_utf8v() should merge combining
// characters.
// "x\xCD\xA0y" => array("x\xCD\xA0", 'y'),
);
foreach
(
$strings
as
$str
=>
$expect
)
{
$this
->
assertEqual
(
$expect
,
phutil_utf8v
(
$str
),
'Vector of '
.
$str
);
}
}
public
function
testUTF8shorten
()
{
$inputs
=
array
(
array
(
"1erp derp derp"
,
9
,
""
,
"1erp derp"
),
array
(
"2erp derp derp"
,
12
,
"..."
,
"2erp derp..."
),
array
(
"derpxderpxderp"
,
12
,
"..."
,
"derpxderp..."
),
array
(
"derp
\x
E2
\x
99
\x
83derpderp"
,
12
,
"..."
,
"derp
\x
E2
\x
99
\x
83derp..."
),
array
(
""
,
12
,
"..."
,
""
),
array
(
"derp"
,
12
,
"..."
,
"derp"
),
array
(
"11111"
,
5
,
"2222"
,
"11111"
),
array
(
"111111"
,
5
,
"2222"
,
"12222"
),
array
(
"D1rp. Derp derp."
,
7
,
"..."
,
"D1rp."
),
array
(
"D2rp. Derp derp."
,
5
,
"..."
,
"D2rp."
),
array
(
"D3rp. Derp derp."
,
4
,
"..."
,
"D..."
),
array
(
"D4rp. Derp derp."
,
14
,
"..."
,
"D4rp. Derp..."
),
array
(
"D5rpderp, derp derp"
,
16
,
"..."
,
"D5rpderp..."
),
array
(
"D6rpderp, derp derp"
,
17
,
"..."
,
"D6rpderp, derp..."
),
// This behavior is maybe a little bad, but it seems mostly reasonable,
// at least for latin languages.
array
(
"Derp, supercalafragalisticexpialadoshus"
,
30
,
"..."
,
"Derp..."
),
// If a string has only word-break characters in it, we should just cut
// it, not produce only the terminal.
array
(
"(((((((((("
,
8
,
'...'
,
'(((((...'
),
);
foreach
(
$inputs
as
$input
)
{
list
(
$string
,
$length
,
$terminal
,
$expect
)
=
$input
;
$result
=
phutil_utf8_shorten
(
$string
,
$length
,
$terminal
);
$this
->
assertEqual
(
$expect
,
$result
,
'Shortening of '
.
$string
);
}
try
{
phutil_utf8_shorten
(
'derp'
,
3
,
'quack'
);
$caught
=
false
;
}
catch
(
Exception
$ex
)
{
$caught
=
true
;
}
$this
->
assertEqual
(
true
,
$caught
,
'Expect exception for terminal.'
);
}
public
function
testUTF8Wrap
()
{
$inputs
=
array
(
array
(
'aaaaaaa'
,
3
,
array
(
'aaa'
,
'aaa'
,
'a'
,
)),
array
(
'aa<b>aaaaa'
,
3
,
array
(
'aa<b>a'
,
'aaa'
,
'a'
,
)),
array
(
'aa&aaaa'
,
3
,
array
(
'aa&'
,
'aaa'
,
'a'
,
)),
array
(
"aa
\x
e6
\x
9d
\x
b1aaaa"
,
3
,
array
(
"aa
\x
e6
\x
9d
\x
b1"
,
'aaa'
,
'a'
,
)),
array
(
''
,
80
,
array
(
)),
array
(
'a'
,
80
,
array
(
'a'
,
)),
);
foreach
(
$inputs
as
$input
)
{
list
(
$string
,
$width
,
$expect
)
=
$input
;
$this
->
assertEqual
(
$expect
,
phutil_utf8_hard_wrap_html
(
$string
,
$width
),
"Wrapping of '"
.
$string
.
"'"
);
}
}
}
Event Timeline
Log In to Comment