Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F65586449
PhutilICSParser.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jun 4, 21:17
Size
13 KB
Mime Type
text/x-php
Expires
Thu, Jun 6, 21:17 (2 d)
Engine
blob
Format
Raw Data
Handle
18098714
Attached To
rPHU libphutil
PhutilICSParser.php
View Options
<?php
final
class
PhutilICSParser
extends
Phobject
{
private
$stack
;
private
$node
;
private
$document
;
public
function
parseICSData
(
$data
)
{
$this
->
stack
=
array
();
$this
->
node
=
null
;
$this
->
document
=
null
;
$lines
=
$this
->
unfoldICSLines
(
$data
);
foreach
(
$lines
as
$line
)
{
$matches
=
null
;
if
(
preg_match
(
'(^BEGIN:(.*)
\z
)'
,
$line
,
$matches
))
{
$this
->
beginParsingNode
(
$matches
[
1
]);
}
else
if
(
preg_match
(
'(^END:(.*)
\z
)'
,
$line
,
$matches
))
{
$this
->
endParsingNode
(
$matches
[
1
]);
}
else
{
$this
->
parseICSProperty
(
$line
);
}
}
if
(!
$this
->
document
)
{
$this
->
raiseParseFailure
(
pht
(
'Expected ICS document to define a "VCALENDAR" section.'
));
}
if
(
$this
->
stack
)
{
$this
->
raiseParseFailure
(
pht
(
'Expected all "BEGIN:" sections in ICS document to have '
.
'corresponding "END:" sections.'
));
}
$document
=
$this
->
document
;
$this
->
document
=
null
;
return
$document
;
}
private
function
getNode
()
{
return
$this
->
node
;
}
private
function
unfoldICSLines
(
$data
)
{
$lines
=
phutil_split_lines
(
$data
,
$retain_endings
=
false
);
// ICS files are wrapped at 75 characters, with overlong lines continued
// on the following line with an initial space or tab. Unwrap all of the
// lines in the file.
$last
=
null
;
foreach
(
$lines
as
$idx
=>
$line
)
{
if
(!
preg_match
(
'/^[
\t
]/'
,
$line
))
{
$last
=
$idx
;
continue
;
}
if
(
$last
===
null
)
{
throw
new
Exception
(
pht
(
'First line of ICS file begins with a space or tab, but this '
.
'marks a continuation line.'
));
}
$lines
[
$last
]
=
$lines
[
$last
].
substr
(
$line
,
1
);
unset
(
$lines
[
$idx
]);
}
return
$lines
;
}
private
function
beginParsingNode
(
$type
)
{
$node
=
$this
->
getNode
();
$new_node
=
$this
->
newICSNode
(
$type
);
if
(
$node
)
{
if
(
$node
instanceof
PhutilCalendarContainerNode
)
{
$node
->
appendChild
(
$new_node
);
}
else
{
$this
->
raiseParseFailure
(
pht
(
'Found unexpected node "%s" inside node "%s".'
,
$new_node
->
getAttribute
(
'ics.type'
),
$node
->
getAttribute
(
'ics.type'
)));
}
}
else
{
if
(
$new_node
instanceof
PhutilCalendarDocumentNode
)
{
if
(
$this
->
document
)
{
$this
->
raiseParseFailure
(
pht
(
'Found multiple "VCALENDAR" nodes in ICS document, '
.
'expected only one.'
));
}
else
{
$this
->
document
=
$new_node
;
}
}
else
{
$this
->
raiseParseFailure
(
pht
(
'Expected ICS document to begin "BEGIN:VCALENDAR".'
));
}
}
$this
->
stack
[]
=
$new_node
;
$this
->
node
=
$new_node
;
return
$this
;
}
private
function
newICSNode
(
$type
)
{
switch
(
$type
)
{
case
'VCALENDAR'
:
$node
=
new
PhutilCalendarDocumentNode
();
break
;
case
'VEVENT'
:
$node
=
new
PhutilCalendarEventNode
();
break
;
default
:
$node
=
new
PhutilCalendarRawNode
();
break
;
}
$node
->
setAttribute
(
'ics.type'
,
$type
);
return
$node
;
}
private
function
endParsingNode
(
$type
)
{
$node
=
$this
->
getNode
();
if
(!
$node
)
{
$this
->
raiseParseFailure
(
pht
(
'Found unexpected "END" without a "BEGIN".'
));
}
$old_type
=
$node
->
getAttribute
(
'ics.type'
);
if
(
$old_type
!=
$type
)
{
$this
->
raiseParseFailure
(
pht
(
'Found mismatched "BEGIN" ("%s") and "END" ("%s") sections.'
,
$old_type
,
$type
));
}
array_pop
(
$this
->
stack
);
if
(
$this
->
stack
)
{
$this
->
node
=
last
(
$this
->
stack
);
}
else
{
$this
->
node
=
null
;
}
return
$this
;
}
private
function
parseICSProperty
(
$line
)
{
$matches
=
null
;
// Properties begin with an alphanumeric name with no escaping, followed
// by either a ";" (to begin a list of parameters) or a ":" (to begin
// the actual field body).
$ok
=
preg_match
(
'(^([^;:]+)([;:])(.*)
\z
)'
,
$line
,
$matches
);
if
(!
$ok
)
{
$this
->
raiseParseFailure
(
pht
(
'Found malformed line in ICS document: %s'
,
$line
));
}
$name
=
$matches
[
1
];
$body
=
$matches
[
3
];
$has_parameters
=
(
$matches
[
2
]
==
';'
);
$parameters
=
array
();
if
(
$has_parameters
)
{
// Parameters are a sensible name, a literal "=", a pile of magic,
// and then maybe a comma and another parameter.
while
(
true
)
{
// We're going to get the first couple of parts first.
$ok
=
preg_match
(
'(^([^=]+)=)'
,
$body
,
$matches
);
if
(!
$ok
)
{
$this
->
raiseParseFailure
(
pht
(
'Found malformed property in ICS document: %s'
,
$body
));
}
$param_name
=
$matches
[
1
];
$body
=
substr
(
$body
,
strlen
(
$matches
[
0
]));
// Now we're going to match zero or more values.
$param_values
=
array
();
while
(
true
)
{
// The value can either be a double-quoted string or an unquoted
// string, with some characters forbidden.
if
(
strlen
(
$body
)
&&
$body
[
0
]
==
'"'
)
{
$is_quoted
=
true
;
$ok
=
preg_match
(
'(^"([^
\x
00-
\x
08
\x
10-
\x
19"]*)")'
,
$body
,
$matches
);
if
(!
$ok
)
{
$this
->
raiseParseFailure
(
pht
(
'Found malformed double-quoted string in ICS document '
.
'parameter value: %s'
,
$body
));
}
}
else
{
$is_quoted
=
false
;
$ok
=
preg_match
(
'(^([^
\x
00-
\x
08
\x
10-
\x
19";:,]*))'
,
$body
,
$matches
);
if
(!
$ok
)
{
$this
->
raiseParseFailure
(
pht
(
'Found malformed unquoted string in ICS document '
.
'parameter value: %s'
,
$body
));
}
}
// NOTE: RFC5545 says "Property parameter values that are not in
// quoted-strings are case-insensitive." -- that is, the quoted and
// unquoted representations are not equivalent. Thus, preserve the
// original formatting in case we ever need to respect this.
$param_values
[]
=
array
(
'value'
=>
$this
->
unescapeParameterValue
(
$matches
[
1
]),
'quoted'
=>
$is_quoted
,
);
$body
=
substr
(
$body
,
strlen
(
$matches
[
0
]));
if
(!
strlen
(
$body
))
{
$this
->
raiseParseFailure
(
pht
(
'Expected ":" after parameters in ICS document property.'
));
}
// If we have a comma now, we're going to read another value. Strip
// it off and keep going.
if
(
$body
[
0
]
==
','
)
{
$body
=
substr
(
$body
,
1
);
continue
;
}
// If we have a colon, this is the last value and also the last
// property. Break, then handle the colon below.
if
(
$body
[
0
]
==
':'
)
{
break
;
}
// We aren't expecting anything else.
$this
->
raiseParseFailure
(
pht
(
'Found unexpected text after reading parameter value: %s'
,
$body
));
}
$parameters
[]
=
array
(
'name'
=>
$param_name
,
'values'
=>
$param_values
,
);
if
(
$body
[
0
]
==
':'
)
{
$body
=
substr
(
$body
,
1
);
break
;
}
}
}
$value
=
$this
->
unescapeFieldValue
(
$name
,
$parameters
,
$body
);
$node
=
$this
->
getNode
();
$raw
=
$node
->
getAttribute
(
'ics.properties'
,
array
());
$raw
[]
=
array
(
'name'
=>
$name
,
'parameters'
=>
$parameters
,
'value'
=>
$value
,
);
$node
->
setAttribute
(
'ics.properties'
,
$raw
);
}
private
function
unescapeParameterValue
(
$data
)
{
// The parameter grammar is adjusted by RFC6868 to permit escaping with
// carets. Remove that escaping.
// This escaping is a bit weird because it's trying to be backwards
// compatible and the original spec didn't think about this and didn't
// provide much room to fix things.
$out
=
''
;
$esc
=
false
;
foreach
(
phutil_utf8v
(
$data
)
as
$c
)
{
if
(!
$esc
)
{
if
(
$c
!=
'^'
)
{
$out
.=
$c
;
}
else
{
$esc
=
true
;
}
}
else
{
switch
(
$c
)
{
case
'n'
:
$out
.=
"
\n
"
;
break
;
case
'^'
:
$out
.=
'^'
;
break
;
case
"'"
:
// NOTE: This is "<caret> <single quote>" being decoded into a
// double quote!
$out
.=
'"'
;
break
;
default
:
// NOTE: The caret is NOT an escape for any other characters.
// This is a "MUST" requirement of RFC6868.
$out
.=
'^'
.
$c
;
break
;
}
}
}
// NOTE: Because caret on its own just means "caret" for backward
// compatibility, we don't warn if we're still in escaped mode once we
// reach the end of the string.
return
$out
;
}
private
function
unescapeFieldValue
(
$name
,
array
$parameters
,
$data
)
{
// NOTE: The encoding of the field value data is dependent on the field
// name (which defines a default encoding) and the parameters (which may
// include "VALUE", specifying a type of the data.
$default_types
=
array
(
'CALSCALE'
=>
'TEXT'
,
'METHOD'
=>
'TEXT'
,
'PRODID'
=>
'TEXT'
,
'VERSION'
=>
'TEXT'
,
'ATTACH'
=>
'URI'
,
'CATEGORIES'
=>
'TEXT'
,
'CLASS'
=>
'TEXT'
,
'COMMENT'
=>
'TEXT'
,
'DESCRIPTION'
=>
'TEXT'
,
// TODO: The spec appears to contradict itself: it says that the value
// type is FLOAT, but it also says that this property value is actually
// two semicolon-separated values, which is not what FLOAT is defined as.
'GEO'
=>
'TEXT'
,
'LOCATION'
=>
'TEXT'
,
'PERCENT-COMPLETE'
=>
'INTEGER'
,
'PRIORITY'
=>
'INTEGER'
,
'RESOURCES'
=>
'TEXT'
,
'STATUS'
=>
'TEXT'
,
'SUMMARY'
=>
'TEXT'
,
'COMPLETED'
=>
'DATE-TIME'
,
'DTEND'
=>
'DATE-TIME'
,
'DUE'
=>
'DATE-TIME'
,
'DTSTART'
=>
'DATE-TIME'
,
'DURATION'
=>
'DURATION'
,
'FREEBUSY'
=>
'PERIOD'
,
'TRANSP'
=>
'TEXT'
,
'TZID'
=>
'TEXT'
,
'TZNAME'
=>
'TEXT'
,
'TZOFFSETFROM'
=>
'UTC-OFFSET'
,
'TZOFFSETTO'
=>
'UTC-OFFSET'
,
'TZURL'
=>
'URI'
,
'ATTENDEE'
=>
'CAL-ADDRESS'
,
'CONTACT'
=>
'TEXT'
,
'ORGANIZER'
=>
'CAL-ADDRESS'
,
'RECURRENCE-ID'
=>
'DATE-TIME'
,
'RELATED-TO'
=>
'TEXT'
,
'URL'
=>
'URI'
,
'UID'
=>
'TEXT'
,
'EXDATE'
=>
'DATE-TIME'
,
'RDATE'
=>
'DATE-TIME'
,
'RRULE'
=>
'RECUR'
,
'ACTION'
=>
'TEXT'
,
'REPEAT'
=>
'INTEGER'
,
'TRIGGER'
=>
'DURATION'
,
'CREATED'
=>
'DATE-TIME'
,
'DTSTAMP'
=>
'DATE-TIME'
,
'LAST-MODIFIED'
=>
'DATE-TIME'
,
'SEQUENCE'
=>
'INTEGER'
,
'REQUEST-STATUS'
=>
'TEXT'
,
);
$value_type
=
idx
(
$default_types
,
$name
,
'TEXT'
);
foreach
(
$parameters
as
$parameter
)
{
if
(
$parameter
[
'name'
]
==
'VALUE'
)
{
$value_type
=
idx
(
head
(
$parameter
[
'values'
]),
'value'
);
}
}
switch
(
$value_type
)
{
case
'BINARY'
:
$result
=
base64_decode
(
$data
);
if
(
$result
===
false
)
{
$this
->
raiseParseFailure
(
pht
(
'Unable to decode base64 data: %s'
,
$data
));
}
break
;
case
'BOOLEAN'
:
$map
=
array
(
'true'
=>
true
,
'false'
=>
false
,
);
$result
=
phutil_utf8_strtolower
(
$data
);
if
(!
isset
(
$map
[
$result
]))
{
$this
->
raiseParseFailure
(
pht
(
'Unexpected BOOLEAN value "%s".'
,
$data
));
}
$result
=
$map
[
$result
];
break
;
case
'CAL-ADDRESS'
:
$result
=
$data
;
break
;
case
'DATE'
:
// This is a comma-separated list of "YYYYMMDD" values.
$result
=
explode
(
','
,
$data
);
break
;
case
'DATE-TIME'
:
$result
=
explode
(
','
,
$data
);
break
;
case
'DURATION'
:
$result
=
explode
(
','
,
$data
);
break
;
case
'FLOAT'
:
$result
=
explode
(
','
,
$data
);
foreach
(
$result
as
$k
=>
$v
)
{
$result
[
$k
]
=
(
float
)
$v
;
}
break
;
case
'INTEGER'
:
$result
=
explode
(
','
,
$data
);
foreach
(
$result
as
$k
=>
$v
)
{
$result
[
$k
]
=
(
int
)
$v
;
}
break
;
case
'PERIOD'
:
$result
=
explode
(
','
,
$data
);
break
;
case
'RECUR'
:
$result
=
$data
;
break
;
case
'TEXT'
:
$result
=
$this
->
unescapeTextValue
(
$data
);
break
;
case
'TIME'
:
$result
=
explode
(
','
,
$data
);
break
;
case
'URI'
:
$result
=
$data
;
break
;
case
'UTC-OFFSET'
:
$result
=
$data
;
break
;
default
:
// RFC5545 says we MUST preserve the data for any types we don't
// recognize.
$result
=
$data
;
break
;
}
return
array
(
'type'
=>
$value_type
,
'value'
=>
$result
,
'raw'
=>
$data
,
);
}
private
function
unescapeTextValue
(
$data
)
{
$result
=
array
();
$buf
=
''
;
$esc
=
false
;
foreach
(
phutil_utf8v
(
$data
)
as
$c
)
{
if
(!
$esc
)
{
if
(
$c
==
'
\\
'
)
{
$esc
=
true
;
}
else
if
(
$c
==
','
)
{
$result
[]
=
$buf
;
$buf
=
''
;
}
else
{
$buf
.=
$c
;
}
}
else
{
switch
(
$c
)
{
case
'n'
:
case
'N'
:
$buf
.=
"
\n
"
;
break
;
default
:
$buf
.=
$c
;
break
;
}
}
}
if
(
$esc
)
{
$this
->
raiseParsFailure
(
pht
(
'ICS document contains TEXT value ending with unescaped '
.
'backslash.'
));
}
$result
[]
=
$buf
;
return
$result
;
}
private
function
raiseParseFailure
(
$message
)
{
throw
new
Exception
(
$message
);
}
}
Event Timeline
Log In to Comment