Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92331062
ast.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Nov 19, 11:19
Size
61 KB
Mime Type
text/x-python
Expires
Thu, Nov 21, 11:19 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
22420526
Attached To
R7934 PCSC_project
ast.py
View Options
#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate an Abstract Syntax Tree (AST) for C++."""
__author__
=
'nnorwitz@google.com (Neal Norwitz)'
# TODO:
# * Tokens should never be exported, need to convert to Nodes
# (return types, parameters, etc.)
# * Handle static class data for templatized classes
# * Handle casts (both C++ and C-style)
# * Handle conditions and loops (if/else, switch, for, while/do)
#
# TODO much, much later:
# * Handle #define
# * exceptions
try
:
# Python 3.x
import
builtins
except
ImportError
:
# Python 2.x
import
__builtin__
as
builtins
import
sys
import
traceback
from
cpp
import
keywords
from
cpp
import
tokenize
from
cpp
import
utils
if
not
hasattr
(
builtins
,
'reversed'
):
# Support Python 2.3 and earlier.
def
reversed
(
seq
):
for
i
in
range
(
len
(
seq
)
-
1
,
-
1
,
-
1
):
yield
seq
[
i
]
if
not
hasattr
(
builtins
,
'next'
):
# Support Python 2.5 and earlier.
def
next
(
obj
):
return
obj
.
next
()
VISIBILITY_PUBLIC
,
VISIBILITY_PROTECTED
,
VISIBILITY_PRIVATE
=
range
(
3
)
FUNCTION_NONE
=
0x00
FUNCTION_CONST
=
0x01
FUNCTION_VIRTUAL
=
0x02
FUNCTION_PURE_VIRTUAL
=
0x04
FUNCTION_CTOR
=
0x08
FUNCTION_DTOR
=
0x10
FUNCTION_ATTRIBUTE
=
0x20
FUNCTION_UNKNOWN_ANNOTATION
=
0x40
FUNCTION_THROW
=
0x80
FUNCTION_OVERRIDE
=
0x100
"""
These are currently unused. Should really handle these properly at some point.
TYPE_MODIFIER_INLINE = 0x010000
TYPE_MODIFIER_EXTERN = 0x020000
TYPE_MODIFIER_STATIC = 0x040000
TYPE_MODIFIER_CONST = 0x080000
TYPE_MODIFIER_REGISTER = 0x100000
TYPE_MODIFIER_VOLATILE = 0x200000
TYPE_MODIFIER_MUTABLE = 0x400000
TYPE_MODIFIER_MAP = {
'inline': TYPE_MODIFIER_INLINE,
'extern': TYPE_MODIFIER_EXTERN,
'static': TYPE_MODIFIER_STATIC,
'const': TYPE_MODIFIER_CONST,
'register': TYPE_MODIFIER_REGISTER,
'volatile': TYPE_MODIFIER_VOLATILE,
'mutable': TYPE_MODIFIER_MUTABLE,
}
"""
_INTERNAL_TOKEN
=
'internal'
_NAMESPACE_POP
=
'ns-pop'
# TODO(nnorwitz): use this as a singleton for templated_types, etc
# where we don't want to create a new empty dict each time. It is also const.
class
_NullDict
(
object
):
__contains__
=
lambda
self
:
False
keys
=
values
=
items
=
iterkeys
=
itervalues
=
iteritems
=
lambda
self
:
()
# TODO(nnorwitz): move AST nodes into a separate module.
class
Node
(
object
):
"""Base AST node."""
def
__init__
(
self
,
start
,
end
):
self
.
start
=
start
self
.
end
=
end
def
IsDeclaration
(
self
):
"""Returns bool if this node is a declaration."""
return
False
def
IsDefinition
(
self
):
"""Returns bool if this node is a definition."""
return
False
def
IsExportable
(
self
):
"""Returns bool if this node exportable from a header file."""
return
False
def
Requires
(
self
,
node
):
"""Does this AST node require the definition of the node passed in?"""
return
False
def
XXX__str__
(
self
):
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
''
)
def
_StringHelper
(
self
,
name
,
suffix
):
if
not
utils
.
DEBUG
:
return
'
%s
(
%s
)'
%
(
name
,
suffix
)
return
'
%s
(
%d
,
%d
,
%s
)'
%
(
name
,
self
.
start
,
self
.
end
,
suffix
)
def
__repr__
(
self
):
return
str
(
self
)
class
Define
(
Node
):
def
__init__
(
self
,
start
,
end
,
name
,
definition
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
name
=
name
self
.
definition
=
definition
def
__str__
(
self
):
value
=
'
%s
%s
'
%
(
self
.
name
,
self
.
definition
)
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
value
)
class
Include
(
Node
):
def
__init__
(
self
,
start
,
end
,
filename
,
system
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
filename
=
filename
self
.
system
=
system
def
__str__
(
self
):
fmt
=
'"
%s
"'
if
self
.
system
:
fmt
=
'<
%s
>'
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
fmt
%
self
.
filename
)
class
Goto
(
Node
):
def
__init__
(
self
,
start
,
end
,
label
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
label
=
label
def
__str__
(
self
):
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
str
(
self
.
label
))
class
Expr
(
Node
):
def
__init__
(
self
,
start
,
end
,
expr
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
expr
=
expr
def
Requires
(
self
,
node
):
# TODO(nnorwitz): impl.
return
False
def
__str__
(
self
):
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
str
(
self
.
expr
))
class
Return
(
Expr
):
pass
class
Delete
(
Expr
):
pass
class
Friend
(
Expr
):
def
__init__
(
self
,
start
,
end
,
expr
,
namespace
):
Expr
.
__init__
(
self
,
start
,
end
,
expr
)
self
.
namespace
=
namespace
[:]
class
Using
(
Node
):
def
__init__
(
self
,
start
,
end
,
names
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
names
=
names
def
__str__
(
self
):
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
str
(
self
.
names
))
class
Parameter
(
Node
):
def
__init__
(
self
,
start
,
end
,
name
,
parameter_type
,
default
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
name
=
name
self
.
type
=
parameter_type
self
.
default
=
default
def
Requires
(
self
,
node
):
# TODO(nnorwitz): handle namespaces, etc.
return
self
.
type
.
name
==
node
.
name
def
__str__
(
self
):
name
=
str
(
self
.
type
)
suffix
=
'
%s
%s
'
%
(
name
,
self
.
name
)
if
self
.
default
:
suffix
+=
' = '
+
''
.
join
([
d
.
name
for
d
in
self
.
default
])
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
suffix
)
class
_GenericDeclaration
(
Node
):
def
__init__
(
self
,
start
,
end
,
name
,
namespace
):
Node
.
__init__
(
self
,
start
,
end
)
self
.
name
=
name
self
.
namespace
=
namespace
[:]
def
FullName
(
self
):
prefix
=
''
if
self
.
namespace
and
self
.
namespace
[
-
1
]:
prefix
=
'::'
.
join
(
self
.
namespace
)
+
'::'
return
prefix
+
self
.
name
def
_TypeStringHelper
(
self
,
suffix
):
if
self
.
namespace
:
names
=
[
n
or
'<anonymous>'
for
n
in
self
.
namespace
]
suffix
+=
' in '
+
'::'
.
join
(
names
)
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
suffix
)
# TODO(nnorwitz): merge with Parameter in some way?
class
VariableDeclaration
(
_GenericDeclaration
):
def
__init__
(
self
,
start
,
end
,
name
,
var_type
,
initial_value
,
namespace
):
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
namespace
)
self
.
type
=
var_type
self
.
initial_value
=
initial_value
def
Requires
(
self
,
node
):
# TODO(nnorwitz): handle namespaces, etc.
return
self
.
type
.
name
==
node
.
name
def
ToString
(
self
):
"""Return a string that tries to reconstitute the variable decl."""
suffix
=
'
%s
%s
'
%
(
self
.
type
,
self
.
name
)
if
self
.
initial_value
:
suffix
+=
' = '
+
self
.
initial_value
return
suffix
def
__str__
(
self
):
return
self
.
_StringHelper
(
self
.
__class__
.
__name__
,
self
.
ToString
())
class
Typedef
(
_GenericDeclaration
):
def
__init__
(
self
,
start
,
end
,
name
,
alias
,
namespace
):
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
namespace
)
self
.
alias
=
alias
def
IsDefinition
(
self
):
return
True
def
IsExportable
(
self
):
return
True
def
Requires
(
self
,
node
):
# TODO(nnorwitz): handle namespaces, etc.
name
=
node
.
name
for
token
in
self
.
alias
:
if
token
is
not
None
and
name
==
token
.
name
:
return
True
return
False
def
__str__
(
self
):
suffix
=
'
%s
,
%s
'
%
(
self
.
name
,
self
.
alias
)
return
self
.
_TypeStringHelper
(
suffix
)
class
_NestedType
(
_GenericDeclaration
):
def
__init__
(
self
,
start
,
end
,
name
,
fields
,
namespace
):
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
namespace
)
self
.
fields
=
fields
def
IsDefinition
(
self
):
return
True
def
IsExportable
(
self
):
return
True
def
__str__
(
self
):
suffix
=
'
%s
, {
%s
}'
%
(
self
.
name
,
self
.
fields
)
return
self
.
_TypeStringHelper
(
suffix
)
class
Union
(
_NestedType
):
pass
class
Enum
(
_NestedType
):
pass
class
Class
(
_GenericDeclaration
):
def
__init__
(
self
,
start
,
end
,
name
,
bases
,
templated_types
,
body
,
namespace
):
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
namespace
)
self
.
bases
=
bases
self
.
body
=
body
self
.
templated_types
=
templated_types
def
IsDeclaration
(
self
):
return
self
.
bases
is
None
and
self
.
body
is
None
def
IsDefinition
(
self
):
return
not
self
.
IsDeclaration
()
def
IsExportable
(
self
):
return
not
self
.
IsDeclaration
()
def
Requires
(
self
,
node
):
# TODO(nnorwitz): handle namespaces, etc.
if
self
.
bases
:
for
token_list
in
self
.
bases
:
# TODO(nnorwitz): bases are tokens, do name comparison.
for
token
in
token_list
:
if
token
.
name
==
node
.
name
:
return
True
# TODO(nnorwitz): search in body too.
return
False
def
__str__
(
self
):
name
=
self
.
name
if
self
.
templated_types
:
name
+=
'<
%s
>'
%
self
.
templated_types
suffix
=
'
%s
,
%s
,
%s
'
%
(
name
,
self
.
bases
,
self
.
body
)
return
self
.
_TypeStringHelper
(
suffix
)
class
Struct
(
Class
):
pass
class
Function
(
_GenericDeclaration
):
def
__init__
(
self
,
start
,
end
,
name
,
return_type
,
parameters
,
modifiers
,
templated_types
,
body
,
namespace
):
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
namespace
)
converter
=
TypeConverter
(
namespace
)
self
.
return_type
=
converter
.
CreateReturnType
(
return_type
)
self
.
parameters
=
converter
.
ToParameters
(
parameters
)
self
.
modifiers
=
modifiers
self
.
body
=
body
self
.
templated_types
=
templated_types
def
IsDeclaration
(
self
):
return
self
.
body
is
None
def
IsDefinition
(
self
):
return
self
.
body
is
not
None
def
IsExportable
(
self
):
if
self
.
return_type
and
'static'
in
self
.
return_type
.
modifiers
:
return
False
return
None
not
in
self
.
namespace
def
Requires
(
self
,
node
):
if
self
.
parameters
:
# TODO(nnorwitz): parameters are tokens, do name comparison.
for
p
in
self
.
parameters
:
if
p
.
name
==
node
.
name
:
return
True
# TODO(nnorwitz): search in body too.
return
False
def
__str__
(
self
):
# TODO(nnorwitz): add templated_types.
suffix
=
(
'
%s
%s
(
%s
), 0x
%02x
,
%s
'
%
(
self
.
return_type
,
self
.
name
,
self
.
parameters
,
self
.
modifiers
,
self
.
body
))
return
self
.
_TypeStringHelper
(
suffix
)
class
Method
(
Function
):
def
__init__
(
self
,
start
,
end
,
name
,
in_class
,
return_type
,
parameters
,
modifiers
,
templated_types
,
body
,
namespace
):
Function
.
__init__
(
self
,
start
,
end
,
name
,
return_type
,
parameters
,
modifiers
,
templated_types
,
body
,
namespace
)
# TODO(nnorwitz): in_class could also be a namespace which can
# mess up finding functions properly.
self
.
in_class
=
in_class
class
Type
(
_GenericDeclaration
):
"""Type used for any variable (eg class, primitive, struct, etc)."""
def
__init__
(
self
,
start
,
end
,
name
,
templated_types
,
modifiers
,
reference
,
pointer
,
array
):
"""
Args:
name: str name of main type
templated_types: [Class (Type?)] template type info between <>
modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
reference, pointer, array: bools
"""
_GenericDeclaration
.
__init__
(
self
,
start
,
end
,
name
,
[])
self
.
templated_types
=
templated_types
if
not
name
and
modifiers
:
self
.
name
=
modifiers
.
pop
()
self
.
modifiers
=
modifiers
self
.
reference
=
reference
self
.
pointer
=
pointer
self
.
array
=
array
def
__str__
(
self
):
prefix
=
''
if
self
.
modifiers
:
prefix
=
' '
.
join
(
self
.
modifiers
)
+
' '
name
=
str
(
self
.
name
)
if
self
.
templated_types
:
name
+=
'<
%s
>'
%
self
.
templated_types
suffix
=
prefix
+
name
if
self
.
reference
:
suffix
+=
'&'
if
self
.
pointer
:
suffix
+=
'*'
if
self
.
array
:
suffix
+=
'[]'
return
self
.
_TypeStringHelper
(
suffix
)
# By definition, Is* are always False. A Type can only exist in
# some sort of variable declaration, parameter, or return value.
def
IsDeclaration
(
self
):
return
False
def
IsDefinition
(
self
):
return
False
def
IsExportable
(
self
):
return
False
class
TypeConverter
(
object
):
def
__init__
(
self
,
namespace_stack
):
self
.
namespace_stack
=
namespace_stack
def
_GetTemplateEnd
(
self
,
tokens
,
start
):
count
=
1
end
=
start
while
1
:
token
=
tokens
[
end
]
end
+=
1
if
token
.
name
==
'<'
:
count
+=
1
elif
token
.
name
==
'>'
:
count
-=
1
if
count
==
0
:
break
return
tokens
[
start
:
end
-
1
],
end
def
ToType
(
self
,
tokens
):
"""Convert [Token,...] to [Class(...), ] useful for base classes.
For example, code like class Foo : public Bar<x, y> { ... };
the "Bar<x, y>" portion gets converted to an AST.
Returns:
[Class(...), ...]
"""
result
=
[]
name_tokens
=
[]
reference
=
pointer
=
array
=
False
def
AddType
(
templated_types
):
# Partition tokens into name and modifier tokens.
names
=
[]
modifiers
=
[]
for
t
in
name_tokens
:
if
keywords
.
IsKeyword
(
t
.
name
):
modifiers
.
append
(
t
.
name
)
else
:
names
.
append
(
t
.
name
)
name
=
''
.
join
(
names
)
if
name_tokens
:
result
.
append
(
Type
(
name_tokens
[
0
]
.
start
,
name_tokens
[
-
1
]
.
end
,
name
,
templated_types
,
modifiers
,
reference
,
pointer
,
array
))
del
name_tokens
[:]
i
=
0
end
=
len
(
tokens
)
while
i
<
end
:
token
=
tokens
[
i
]
if
token
.
name
==
'<'
:
new_tokens
,
new_end
=
self
.
_GetTemplateEnd
(
tokens
,
i
+
1
)
AddType
(
self
.
ToType
(
new_tokens
))
# If there is a comma after the template, we need to consume
# that here otherwise it becomes part of the name.
i
=
new_end
reference
=
pointer
=
array
=
False
elif
token
.
name
==
','
:
AddType
([])
reference
=
pointer
=
array
=
False
elif
token
.
name
==
'*'
:
pointer
=
True
elif
token
.
name
==
'&'
:
reference
=
True
elif
token
.
name
==
'['
:
pointer
=
True
elif
token
.
name
==
']'
:
pass
else
:
name_tokens
.
append
(
token
)
i
+=
1
if
name_tokens
:
# No '<' in the tokens, just a simple name and no template.
AddType
([])
return
result
def
DeclarationToParts
(
self
,
parts
,
needs_name_removed
):
name
=
None
default
=
[]
if
needs_name_removed
:
# Handle default (initial) values properly.
for
i
,
t
in
enumerate
(
parts
):
if
t
.
name
==
'='
:
default
=
parts
[
i
+
1
:]
name
=
parts
[
i
-
1
]
.
name
if
name
==
']'
and
parts
[
i
-
2
]
.
name
==
'['
:
name
=
parts
[
i
-
3
]
.
name
i
-=
1
parts
=
parts
[:
i
-
1
]
break
else
:
if
parts
[
-
1
]
.
token_type
==
tokenize
.
NAME
:
name
=
parts
.
pop
()
.
name
else
:
# TODO(nnorwitz): this is a hack that happens for code like
# Register(Foo<T>); where it thinks this is a function call
# but it's actually a declaration.
name
=
'???'
modifiers
=
[]
type_name
=
[]
other_tokens
=
[]
templated_types
=
[]
i
=
0
end
=
len
(
parts
)
while
i
<
end
:
p
=
parts
[
i
]
if
keywords
.
IsKeyword
(
p
.
name
):
modifiers
.
append
(
p
.
name
)
elif
p
.
name
==
'<'
:
templated_tokens
,
new_end
=
self
.
_GetTemplateEnd
(
parts
,
i
+
1
)
templated_types
=
self
.
ToType
(
templated_tokens
)
i
=
new_end
-
1
# Don't add a spurious :: to data members being initialized.
next_index
=
i
+
1
if
next_index
<
end
and
parts
[
next_index
]
.
name
==
'::'
:
i
+=
1
elif
p
.
name
in
(
'['
,
']'
,
'='
):
# These are handled elsewhere.
other_tokens
.
append
(
p
)
elif
p
.
name
not
in
(
'*'
,
'&'
,
'>'
):
# Ensure that names have a space between them.
if
(
type_name
and
type_name
[
-
1
]
.
token_type
==
tokenize
.
NAME
and
p
.
token_type
==
tokenize
.
NAME
):
type_name
.
append
(
tokenize
.
Token
(
tokenize
.
SYNTAX
,
' '
,
0
,
0
))
type_name
.
append
(
p
)
else
:
other_tokens
.
append
(
p
)
i
+=
1
type_name
=
''
.
join
([
t
.
name
for
t
in
type_name
])
return
name
,
type_name
,
templated_types
,
modifiers
,
default
,
other_tokens
def
ToParameters
(
self
,
tokens
):
if
not
tokens
:
return
[]
result
=
[]
name
=
type_name
=
''
type_modifiers
=
[]
pointer
=
reference
=
array
=
False
first_token
=
None
default
=
[]
def
AddParameter
(
end
):
if
default
:
del
default
[
0
]
# Remove flag.
parts
=
self
.
DeclarationToParts
(
type_modifiers
,
True
)
(
name
,
type_name
,
templated_types
,
modifiers
,
unused_default
,
unused_other_tokens
)
=
parts
parameter_type
=
Type
(
first_token
.
start
,
first_token
.
end
,
type_name
,
templated_types
,
modifiers
,
reference
,
pointer
,
array
)
p
=
Parameter
(
first_token
.
start
,
end
,
name
,
parameter_type
,
default
)
result
.
append
(
p
)
template_count
=
0
for
s
in
tokens
:
if
not
first_token
:
first_token
=
s
if
s
.
name
==
'<'
:
template_count
+=
1
elif
s
.
name
==
'>'
:
template_count
-=
1
if
template_count
>
0
:
type_modifiers
.
append
(
s
)
continue
if
s
.
name
==
','
:
AddParameter
(
s
.
start
)
name
=
type_name
=
''
type_modifiers
=
[]
pointer
=
reference
=
array
=
False
first_token
=
None
default
=
[]
elif
s
.
name
==
'*'
:
pointer
=
True
elif
s
.
name
==
'&'
:
reference
=
True
elif
s
.
name
==
'['
:
array
=
True
elif
s
.
name
==
']'
:
pass
# Just don't add to type_modifiers.
elif
s
.
name
==
'='
:
# Got a default value. Add any value (None) as a flag.
default
.
append
(
None
)
elif
default
:
default
.
append
(
s
)
else
:
type_modifiers
.
append
(
s
)
AddParameter
(
tokens
[
-
1
]
.
end
)
return
result
def
CreateReturnType
(
self
,
return_type_seq
):
if
not
return_type_seq
:
return
None
start
=
return_type_seq
[
0
]
.
start
end
=
return_type_seq
[
-
1
]
.
end
_
,
name
,
templated_types
,
modifiers
,
default
,
other_tokens
=
\
self
.
DeclarationToParts
(
return_type_seq
,
False
)
names
=
[
n
.
name
for
n
in
other_tokens
]
reference
=
'&'
in
names
pointer
=
'*'
in
names
array
=
'['
in
names
return
Type
(
start
,
end
,
name
,
templated_types
,
modifiers
,
reference
,
pointer
,
array
)
def
GetTemplateIndices
(
self
,
names
):
# names is a list of strings.
start
=
names
.
index
(
'<'
)
end
=
len
(
names
)
-
1
while
end
>
0
:
if
names
[
end
]
==
'>'
:
break
end
-=
1
return
start
,
end
+
1
class
AstBuilder
(
object
):
def
__init__
(
self
,
token_stream
,
filename
,
in_class
=
''
,
visibility
=
None
,
namespace_stack
=
[]):
self
.
tokens
=
token_stream
self
.
filename
=
filename
# TODO(nnorwitz): use a better data structure (deque) for the queue.
# Switching directions of the "queue" improved perf by about 25%.
# Using a deque should be even better since we access from both sides.
self
.
token_queue
=
[]
self
.
namespace_stack
=
namespace_stack
[:]
self
.
in_class
=
in_class
if
in_class
is
None
:
self
.
in_class_name_only
=
None
else
:
self
.
in_class_name_only
=
in_class
.
split
(
'::'
)[
-
1
]
self
.
visibility
=
visibility
self
.
in_function
=
False
self
.
current_token
=
None
# Keep the state whether we are currently handling a typedef or not.
self
.
_handling_typedef
=
False
self
.
converter
=
TypeConverter
(
self
.
namespace_stack
)
def
HandleError
(
self
,
msg
,
token
):
printable_queue
=
list
(
reversed
(
self
.
token_queue
[
-
20
:]))
sys
.
stderr
.
write
(
'Got
%s
in
%s
@
%s
%s
\n
'
%
(
msg
,
self
.
filename
,
token
,
printable_queue
))
def
Generate
(
self
):
while
1
:
token
=
self
.
_GetNextToken
()
if
not
token
:
break
# Get the next token.
self
.
current_token
=
token
# Dispatch on the next token type.
if
token
.
token_type
==
_INTERNAL_TOKEN
:
if
token
.
name
==
_NAMESPACE_POP
:
self
.
namespace_stack
.
pop
()
continue
try
:
result
=
self
.
_GenerateOne
(
token
)
if
result
is
not
None
:
yield
result
except
:
self
.
HandleError
(
'exception'
,
token
)
raise
def
_CreateVariable
(
self
,
pos_token
,
name
,
type_name
,
type_modifiers
,
ref_pointer_name_seq
,
templated_types
,
value
=
None
):
reference
=
'&'
in
ref_pointer_name_seq
pointer
=
'*'
in
ref_pointer_name_seq
array
=
'['
in
ref_pointer_name_seq
var_type
=
Type
(
pos_token
.
start
,
pos_token
.
end
,
type_name
,
templated_types
,
type_modifiers
,
reference
,
pointer
,
array
)
return
VariableDeclaration
(
pos_token
.
start
,
pos_token
.
end
,
name
,
var_type
,
value
,
self
.
namespace_stack
)
def
_GenerateOne
(
self
,
token
):
if
token
.
token_type
==
tokenize
.
NAME
:
if
(
keywords
.
IsKeyword
(
token
.
name
)
and
not
keywords
.
IsBuiltinType
(
token
.
name
)):
method
=
getattr
(
self
,
'handle_'
+
token
.
name
)
return
method
()
elif
token
.
name
==
self
.
in_class_name_only
:
# The token name is the same as the class, must be a ctor if
# there is a paren. Otherwise, it's the return type.
# Peek ahead to get the next token to figure out which.
next
=
self
.
_GetNextToken
()
self
.
_AddBackToken
(
next
)
if
next
.
token_type
==
tokenize
.
SYNTAX
and
next
.
name
==
'('
:
return
self
.
_GetMethod
([
token
],
FUNCTION_CTOR
,
None
,
True
)
# Fall through--handle like any other method.
# Handle data or function declaration/definition.
syntax
=
tokenize
.
SYNTAX
temp_tokens
,
last_token
=
\
self
.
_GetVarTokensUpTo
(
syntax
,
'('
,
';'
,
'{'
,
'['
)
temp_tokens
.
insert
(
0
,
token
)
if
last_token
.
name
==
'('
:
# If there is an assignment before the paren,
# this is an expression, not a method.
expr
=
bool
([
e
for
e
in
temp_tokens
if
e
.
name
==
'='
])
if
expr
:
new_temp
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
temp_tokens
.
append
(
last_token
)
temp_tokens
.
extend
(
new_temp
)
last_token
=
tokenize
.
Token
(
tokenize
.
SYNTAX
,
';'
,
0
,
0
)
if
last_token
.
name
==
'['
:
# Handle array, this isn't a method, unless it's an operator.
# TODO(nnorwitz): keep the size somewhere.
# unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
temp_tokens
.
append
(
last_token
)
if
temp_tokens
[
-
2
]
.
name
==
'operator'
:
temp_tokens
.
append
(
self
.
_GetNextToken
())
else
:
temp_tokens2
,
last_token
=
\
self
.
_GetVarTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
temp_tokens
.
extend
(
temp_tokens2
)
if
last_token
.
name
==
';'
:
# Handle data, this isn't a method.
parts
=
self
.
converter
.
DeclarationToParts
(
temp_tokens
,
True
)
(
name
,
type_name
,
templated_types
,
modifiers
,
default
,
unused_other_tokens
)
=
parts
t0
=
temp_tokens
[
0
]
names
=
[
t
.
name
for
t
in
temp_tokens
]
if
templated_types
:
start
,
end
=
self
.
converter
.
GetTemplateIndices
(
names
)
names
=
names
[:
start
]
+
names
[
end
:]
default
=
''
.
join
([
t
.
name
for
t
in
default
])
return
self
.
_CreateVariable
(
t0
,
name
,
type_name
,
modifiers
,
names
,
templated_types
,
default
)
if
last_token
.
name
==
'{'
:
self
.
_AddBackTokens
(
temp_tokens
[
1
:])
self
.
_AddBackToken
(
last_token
)
method_name
=
temp_tokens
[
0
]
.
name
method
=
getattr
(
self
,
'handle_'
+
method_name
,
None
)
if
not
method
:
# Must be declaring a variable.
# TODO(nnorwitz): handle the declaration.
return
None
return
method
()
return
self
.
_GetMethod
(
temp_tokens
,
0
,
None
,
False
)
elif
token
.
token_type
==
tokenize
.
SYNTAX
:
if
token
.
name
==
'~'
and
self
.
in_class
:
# Must be a dtor (probably not in method body).
token
=
self
.
_GetNextToken
()
# self.in_class can contain A::Name, but the dtor will only
# be Name. Make sure to compare against the right value.
if
(
token
.
token_type
==
tokenize
.
NAME
and
token
.
name
==
self
.
in_class_name_only
):
return
self
.
_GetMethod
([
token
],
FUNCTION_DTOR
,
None
,
True
)
# TODO(nnorwitz): handle a lot more syntax.
elif
token
.
token_type
==
tokenize
.
PREPROCESSOR
:
# TODO(nnorwitz): handle more preprocessor directives.
# token starts with a #, so remove it and strip whitespace.
name
=
token
.
name
[
1
:]
.
lstrip
()
if
name
.
startswith
(
'include'
):
# Remove "include".
name
=
name
[
7
:]
.
strip
()
assert
name
# Handle #include \<newline> "header-on-second-line.h".
if
name
.
startswith
(
'
\\
'
):
name
=
name
[
1
:]
.
strip
()
assert
name
[
0
]
in
'<"'
,
token
assert
name
[
-
1
]
in
'>"'
,
token
system
=
name
[
0
]
==
'<'
filename
=
name
[
1
:
-
1
]
return
Include
(
token
.
start
,
token
.
end
,
filename
,
system
)
if
name
.
startswith
(
'define'
):
# Remove "define".
name
=
name
[
6
:]
.
strip
()
assert
name
value
=
''
for
i
,
c
in
enumerate
(
name
):
if
c
.
isspace
():
value
=
name
[
i
:]
.
lstrip
()
name
=
name
[:
i
]
break
return
Define
(
token
.
start
,
token
.
end
,
name
,
value
)
if
name
.
startswith
(
'if'
)
and
name
[
2
:
3
]
.
isspace
():
condition
=
name
[
3
:]
.
strip
()
if
condition
.
startswith
(
'0'
)
or
condition
.
startswith
(
'(0)'
):
self
.
_SkipIf0Blocks
()
return
None
def
_GetTokensUpTo
(
self
,
expected_token_type
,
expected_token
):
return
self
.
_GetVarTokensUpTo
(
expected_token_type
,
expected_token
)[
0
]
def
_GetVarTokensUpTo
(
self
,
expected_token_type
,
*
expected_tokens
):
last_token
=
self
.
_GetNextToken
()
tokens
=
[]
while
(
last_token
.
token_type
!=
expected_token_type
or
last_token
.
name
not
in
expected_tokens
):
tokens
.
append
(
last_token
)
last_token
=
self
.
_GetNextToken
()
return
tokens
,
last_token
# TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
def
_IgnoreUpTo
(
self
,
token_type
,
token
):
unused_tokens
=
self
.
_GetTokensUpTo
(
token_type
,
token
)
def
_SkipIf0Blocks
(
self
):
count
=
1
while
1
:
token
=
self
.
_GetNextToken
()
if
token
.
token_type
!=
tokenize
.
PREPROCESSOR
:
continue
name
=
token
.
name
[
1
:]
.
lstrip
()
if
name
.
startswith
(
'endif'
):
count
-=
1
if
count
==
0
:
break
elif
name
.
startswith
(
'if'
):
count
+=
1
def
_GetMatchingChar
(
self
,
open_paren
,
close_paren
,
GetNextToken
=
None
):
if
GetNextToken
is
None
:
GetNextToken
=
self
.
_GetNextToken
# Assumes the current token is open_paren and we will consume
# and return up to the close_paren.
count
=
1
token
=
GetNextToken
()
while
1
:
if
token
.
token_type
==
tokenize
.
SYNTAX
:
if
token
.
name
==
open_paren
:
count
+=
1
elif
token
.
name
==
close_paren
:
count
-=
1
if
count
==
0
:
break
yield
token
token
=
GetNextToken
()
yield
token
def
_GetParameters
(
self
):
return
self
.
_GetMatchingChar
(
'('
,
')'
)
def
GetScope
(
self
):
return
self
.
_GetMatchingChar
(
'{'
,
'}'
)
def
_GetNextToken
(
self
):
if
self
.
token_queue
:
return
self
.
token_queue
.
pop
()
return
next
(
self
.
tokens
)
def
_AddBackToken
(
self
,
token
):
if
token
.
whence
==
tokenize
.
WHENCE_STREAM
:
token
.
whence
=
tokenize
.
WHENCE_QUEUE
self
.
token_queue
.
insert
(
0
,
token
)
else
:
assert
token
.
whence
==
tokenize
.
WHENCE_QUEUE
,
token
self
.
token_queue
.
append
(
token
)
def
_AddBackTokens
(
self
,
tokens
):
if
tokens
:
if
tokens
[
-
1
]
.
whence
==
tokenize
.
WHENCE_STREAM
:
for
token
in
tokens
:
token
.
whence
=
tokenize
.
WHENCE_QUEUE
self
.
token_queue
[:
0
]
=
reversed
(
tokens
)
else
:
assert
tokens
[
-
1
]
.
whence
==
tokenize
.
WHENCE_QUEUE
,
tokens
self
.
token_queue
.
extend
(
reversed
(
tokens
))
def
GetName
(
self
,
seq
=
None
):
"""Returns ([tokens], next_token_info)."""
GetNextToken
=
self
.
_GetNextToken
if
seq
is
not
None
:
it
=
iter
(
seq
)
GetNextToken
=
lambda
:
next
(
it
)
next_token
=
GetNextToken
()
tokens
=
[]
last_token_was_name
=
False
while
(
next_token
.
token_type
==
tokenize
.
NAME
or
(
next_token
.
token_type
==
tokenize
.
SYNTAX
and
next_token
.
name
in
(
'::'
,
'<'
))):
# Two NAMEs in a row means the identifier should terminate.
# It's probably some sort of variable declaration.
if
last_token_was_name
and
next_token
.
token_type
==
tokenize
.
NAME
:
break
last_token_was_name
=
next_token
.
token_type
==
tokenize
.
NAME
tokens
.
append
(
next_token
)
# Handle templated names.
if
next_token
.
name
==
'<'
:
tokens
.
extend
(
self
.
_GetMatchingChar
(
'<'
,
'>'
,
GetNextToken
))
last_token_was_name
=
True
next_token
=
GetNextToken
()
return
tokens
,
next_token
def
GetMethod
(
self
,
modifiers
,
templated_types
):
return_type_and_name
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
'('
)
assert
len
(
return_type_and_name
)
>=
1
return
self
.
_GetMethod
(
return_type_and_name
,
modifiers
,
templated_types
,
False
)
def
_GetMethod
(
self
,
return_type_and_name
,
modifiers
,
templated_types
,
get_paren
):
template_portion
=
None
if
get_paren
:
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
if
token
.
name
==
'<'
:
# Handle templatized dtors.
template_portion
=
[
token
]
template_portion
.
extend
(
self
.
_GetMatchingChar
(
'<'
,
'>'
))
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
assert
token
.
name
==
'('
,
token
name
=
return_type_and_name
.
pop
()
# Handle templatized ctors.
if
name
.
name
==
'>'
:
index
=
1
while
return_type_and_name
[
index
]
.
name
!=
'<'
:
index
+=
1
template_portion
=
return_type_and_name
[
index
:]
+
[
name
]
del
return_type_and_name
[
index
:]
name
=
return_type_and_name
.
pop
()
elif
name
.
name
==
']'
:
rt
=
return_type_and_name
assert
rt
[
-
1
]
.
name
==
'['
,
return_type_and_name
assert
rt
[
-
2
]
.
name
==
'operator'
,
return_type_and_name
name_seq
=
return_type_and_name
[
-
2
:]
del
return_type_and_name
[
-
2
:]
name
=
tokenize
.
Token
(
tokenize
.
NAME
,
'operator[]'
,
name_seq
[
0
]
.
start
,
name
.
end
)
# Get the open paren so _GetParameters() below works.
unused_open_paren
=
self
.
_GetNextToken
()
# TODO(nnorwitz): store template_portion.
return_type
=
return_type_and_name
indices
=
name
if
return_type
:
indices
=
return_type
[
0
]
# Force ctor for templatized ctors.
if
name
.
name
==
self
.
in_class
and
not
modifiers
:
modifiers
|=
FUNCTION_CTOR
parameters
=
list
(
self
.
_GetParameters
())
del
parameters
[
-
1
]
# Remove trailing ')'.
# Handling operator() is especially weird.
if
name
.
name
==
'operator'
and
not
parameters
:
token
=
self
.
_GetNextToken
()
assert
token
.
name
==
'('
,
token
parameters
=
list
(
self
.
_GetParameters
())
del
parameters
[
-
1
]
# Remove trailing ')'.
token
=
self
.
_GetNextToken
()
while
token
.
token_type
==
tokenize
.
NAME
:
modifier_token
=
token
token
=
self
.
_GetNextToken
()
if
modifier_token
.
name
==
'const'
:
modifiers
|=
FUNCTION_CONST
elif
modifier_token
.
name
==
'__attribute__'
:
# TODO(nnorwitz): handle more __attribute__ details.
modifiers
|=
FUNCTION_ATTRIBUTE
assert
token
.
name
==
'('
,
token
# Consume everything between the (parens).
unused_tokens
=
list
(
self
.
_GetMatchingChar
(
'('
,
')'
))
token
=
self
.
_GetNextToken
()
elif
modifier_token
.
name
==
'throw'
:
modifiers
|=
FUNCTION_THROW
assert
token
.
name
==
'('
,
token
# Consume everything between the (parens).
unused_tokens
=
list
(
self
.
_GetMatchingChar
(
'('
,
')'
))
token
=
self
.
_GetNextToken
()
elif
modifier_token
.
name
==
'override'
:
modifiers
|=
FUNCTION_OVERRIDE
elif
modifier_token
.
name
==
modifier_token
.
name
.
upper
():
# HACK(nnorwitz): assume that all upper-case names
# are some macro we aren't expanding.
modifiers
|=
FUNCTION_UNKNOWN_ANNOTATION
else
:
self
.
HandleError
(
'unexpected token'
,
modifier_token
)
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
# Handle ctor initializers.
if
token
.
name
==
':'
:
# TODO(nnorwitz): anything else to handle for initializer list?
while
token
.
name
!=
';'
and
token
.
name
!=
'{'
:
token
=
self
.
_GetNextToken
()
# Handle pointer to functions that are really data but look
# like method declarations.
if
token
.
name
==
'('
:
if
parameters
[
0
]
.
name
==
'*'
:
# name contains the return type.
name
=
parameters
.
pop
()
# parameters contains the name of the data.
modifiers
=
[
p
.
name
for
p
in
parameters
]
# Already at the ( to open the parameter list.
function_parameters
=
list
(
self
.
_GetMatchingChar
(
'('
,
')'
))
del
function_parameters
[
-
1
]
# Remove trailing ')'.
# TODO(nnorwitz): store the function_parameters.
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
assert
token
.
name
==
';'
,
token
return
self
.
_CreateVariable
(
indices
,
name
.
name
,
indices
.
name
,
modifiers
,
''
,
None
)
# At this point, we got something like:
# return_type (type::*name_)(params);
# This is a data member called name_ that is a function pointer.
# With this code: void (sq_type::*field_)(string&);
# We get: name=void return_type=[] parameters=sq_type ... field_
# TODO(nnorwitz): is return_type always empty?
# TODO(nnorwitz): this isn't even close to being correct.
# Just put in something so we don't crash and can move on.
real_name
=
parameters
[
-
1
]
modifiers
=
[
p
.
name
for
p
in
self
.
_GetParameters
()]
del
modifiers
[
-
1
]
# Remove trailing ')'.
return
self
.
_CreateVariable
(
indices
,
real_name
.
name
,
indices
.
name
,
modifiers
,
''
,
None
)
if
token
.
name
==
'{'
:
body
=
list
(
self
.
GetScope
())
del
body
[
-
1
]
# Remove trailing '}'.
else
:
body
=
None
if
token
.
name
==
'='
:
token
=
self
.
_GetNextToken
()
if
token
.
name
==
'default'
or
token
.
name
==
'delete'
:
# Ignore explicitly defaulted and deleted special members
# in C++11.
token
=
self
.
_GetNextToken
()
else
:
# Handle pure-virtual declarations.
assert
token
.
token_type
==
tokenize
.
CONSTANT
,
token
assert
token
.
name
==
'0'
,
token
modifiers
|=
FUNCTION_PURE_VIRTUAL
token
=
self
.
_GetNextToken
()
if
token
.
name
==
'['
:
# TODO(nnorwitz): store tokens and improve parsing.
# template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
tokens
=
list
(
self
.
_GetMatchingChar
(
'['
,
']'
))
token
=
self
.
_GetNextToken
()
assert
token
.
name
==
';'
,
(
token
,
return_type_and_name
,
parameters
)
# Looks like we got a method, not a function.
if
len
(
return_type
)
>
2
and
return_type
[
-
1
]
.
name
==
'::'
:
return_type
,
in_class
=
\
self
.
_GetReturnTypeAndClassName
(
return_type
)
return
Method
(
indices
.
start
,
indices
.
end
,
name
.
name
,
in_class
,
return_type
,
parameters
,
modifiers
,
templated_types
,
body
,
self
.
namespace_stack
)
return
Function
(
indices
.
start
,
indices
.
end
,
name
.
name
,
return_type
,
parameters
,
modifiers
,
templated_types
,
body
,
self
.
namespace_stack
)
def
_GetReturnTypeAndClassName
(
self
,
token_seq
):
# Splitting the return type from the class name in a method
# can be tricky. For example, Return::Type::Is::Hard::To::Find().
# Where is the return type and where is the class name?
# The heuristic used is to pull the last name as the class name.
# This includes all the templated type info.
# TODO(nnorwitz): if there is only One name like in the
# example above, punt and assume the last bit is the class name.
# Ignore a :: prefix, if exists so we can find the first real name.
i
=
0
if
token_seq
[
0
]
.
name
==
'::'
:
i
=
1
# Ignore a :: suffix, if exists.
end
=
len
(
token_seq
)
-
1
if
token_seq
[
end
-
1
]
.
name
==
'::'
:
end
-=
1
# Make a copy of the sequence so we can append a sentinel
# value. This is required for GetName will has to have some
# terminating condition beyond the last name.
seq_copy
=
token_seq
[
i
:
end
]
seq_copy
.
append
(
tokenize
.
Token
(
tokenize
.
SYNTAX
,
''
,
0
,
0
))
names
=
[]
while
i
<
end
:
# Iterate through the sequence parsing out each name.
new_name
,
next
=
self
.
GetName
(
seq_copy
[
i
:])
assert
new_name
,
'Got empty new_name, next=
%s
'
%
next
# We got a pointer or ref. Add it to the name.
if
next
and
next
.
token_type
==
tokenize
.
SYNTAX
:
new_name
.
append
(
next
)
names
.
append
(
new_name
)
i
+=
len
(
new_name
)
# Now that we have the names, it's time to undo what we did.
# Remove the sentinel value.
names
[
-
1
]
.
pop
()
# Flatten the token sequence for the return type.
return_type
=
[
e
for
seq
in
names
[:
-
1
]
for
e
in
seq
]
# The class name is the last name.
class_name
=
names
[
-
1
]
return
return_type
,
class_name
def
handle_bool
(
self
):
pass
def
handle_char
(
self
):
pass
def
handle_int
(
self
):
pass
def
handle_long
(
self
):
pass
def
handle_short
(
self
):
pass
def
handle_double
(
self
):
pass
def
handle_float
(
self
):
pass
def
handle_void
(
self
):
pass
def
handle_wchar_t
(
self
):
pass
def
handle_unsigned
(
self
):
pass
def
handle_signed
(
self
):
pass
def
_GetNestedType
(
self
,
ctor
):
name
=
None
name_tokens
,
token
=
self
.
GetName
()
if
name_tokens
:
name
=
''
.
join
([
t
.
name
for
t
in
name_tokens
])
# Handle forward declarations.
if
token
.
token_type
==
tokenize
.
SYNTAX
and
token
.
name
==
';'
:
return
ctor
(
token
.
start
,
token
.
end
,
name
,
None
,
self
.
namespace_stack
)
if
token
.
token_type
==
tokenize
.
NAME
and
self
.
_handling_typedef
:
self
.
_AddBackToken
(
token
)
return
ctor
(
token
.
start
,
token
.
end
,
name
,
None
,
self
.
namespace_stack
)
# Must be the type declaration.
fields
=
list
(
self
.
_GetMatchingChar
(
'{'
,
'}'
))
del
fields
[
-
1
]
# Remove trailing '}'.
if
token
.
token_type
==
tokenize
.
SYNTAX
and
token
.
name
==
'{'
:
next
=
self
.
_GetNextToken
()
new_type
=
ctor
(
token
.
start
,
token
.
end
,
name
,
fields
,
self
.
namespace_stack
)
# A name means this is an anonymous type and the name
# is the variable declaration.
if
next
.
token_type
!=
tokenize
.
NAME
:
return
new_type
name
=
new_type
token
=
next
# Must be variable declaration using the type prefixed with keyword.
assert
token
.
token_type
==
tokenize
.
NAME
,
token
return
self
.
_CreateVariable
(
token
,
token
.
name
,
name
,
[],
''
,
None
)
def
handle_struct
(
self
):
# Special case the handling typedef/aliasing of structs here.
# It would be a pain to handle in the class code.
name_tokens
,
var_token
=
self
.
GetName
()
if
name_tokens
:
next_token
=
self
.
_GetNextToken
()
is_syntax
=
(
var_token
.
token_type
==
tokenize
.
SYNTAX
and
var_token
.
name
[
0
]
in
'*&'
)
is_variable
=
(
var_token
.
token_type
==
tokenize
.
NAME
and
next_token
.
name
==
';'
)
variable
=
var_token
if
is_syntax
and
not
is_variable
:
variable
=
next_token
temp
=
self
.
_GetNextToken
()
if
temp
.
token_type
==
tokenize
.
SYNTAX
and
temp
.
name
==
'('
:
# Handle methods declared to return a struct.
t0
=
name_tokens
[
0
]
struct
=
tokenize
.
Token
(
tokenize
.
NAME
,
'struct'
,
t0
.
start
-
7
,
t0
.
start
-
2
)
type_and_name
=
[
struct
]
type_and_name
.
extend
(
name_tokens
)
type_and_name
.
extend
((
var_token
,
next_token
))
return
self
.
_GetMethod
(
type_and_name
,
0
,
None
,
False
)
assert
temp
.
name
==
';'
,
(
temp
,
name_tokens
,
var_token
)
if
is_syntax
or
(
is_variable
and
not
self
.
_handling_typedef
):
modifiers
=
[
'struct'
]
type_name
=
''
.
join
([
t
.
name
for
t
in
name_tokens
])
position
=
name_tokens
[
0
]
return
self
.
_CreateVariable
(
position
,
variable
.
name
,
type_name
,
modifiers
,
var_token
.
name
,
None
)
name_tokens
.
extend
((
var_token
,
next_token
))
self
.
_AddBackTokens
(
name_tokens
)
else
:
self
.
_AddBackToken
(
var_token
)
return
self
.
_GetClass
(
Struct
,
VISIBILITY_PUBLIC
,
None
)
def
handle_union
(
self
):
return
self
.
_GetNestedType
(
Union
)
def
handle_enum
(
self
):
token
=
self
.
_GetNextToken
()
if
not
(
token
.
token_type
==
tokenize
.
NAME
and
token
.
name
==
'class'
):
self
.
_AddBackToken
(
token
)
return
self
.
_GetNestedType
(
Enum
)
def
handle_auto
(
self
):
# TODO(nnorwitz): warn about using auto? Probably not since it
# will be reclaimed and useful for C++0x.
pass
def
handle_register
(
self
):
pass
def
handle_const
(
self
):
pass
def
handle_inline
(
self
):
pass
def
handle_extern
(
self
):
pass
def
handle_static
(
self
):
pass
def
handle_virtual
(
self
):
# What follows must be a method.
token
=
token2
=
self
.
_GetNextToken
()
if
token
.
name
==
'inline'
:
# HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
token2
=
self
.
_GetNextToken
()
if
token2
.
token_type
==
tokenize
.
SYNTAX
and
token2
.
name
==
'~'
:
return
self
.
GetMethod
(
FUNCTION_VIRTUAL
+
FUNCTION_DTOR
,
None
)
assert
token
.
token_type
==
tokenize
.
NAME
or
token
.
name
==
'::'
,
token
return_type_and_name
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
'('
)
# )
return_type_and_name
.
insert
(
0
,
token
)
if
token2
is
not
token
:
return_type_and_name
.
insert
(
1
,
token2
)
return
self
.
_GetMethod
(
return_type_and_name
,
FUNCTION_VIRTUAL
,
None
,
False
)
def
handle_volatile
(
self
):
pass
def
handle_mutable
(
self
):
pass
def
handle_public
(
self
):
assert
self
.
in_class
self
.
visibility
=
VISIBILITY_PUBLIC
def
handle_protected
(
self
):
assert
self
.
in_class
self
.
visibility
=
VISIBILITY_PROTECTED
def
handle_private
(
self
):
assert
self
.
in_class
self
.
visibility
=
VISIBILITY_PRIVATE
def
handle_friend
(
self
):
tokens
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
assert
tokens
t0
=
tokens
[
0
]
return
Friend
(
t0
.
start
,
t0
.
end
,
tokens
,
self
.
namespace_stack
)
def
handle_static_cast
(
self
):
pass
def
handle_const_cast
(
self
):
pass
def
handle_dynamic_cast
(
self
):
pass
def
handle_reinterpret_cast
(
self
):
pass
def
handle_new
(
self
):
pass
def
handle_delete
(
self
):
tokens
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
assert
tokens
return
Delete
(
tokens
[
0
]
.
start
,
tokens
[
0
]
.
end
,
tokens
)
def
handle_typedef
(
self
):
token
=
self
.
_GetNextToken
()
if
(
token
.
token_type
==
tokenize
.
NAME
and
keywords
.
IsKeyword
(
token
.
name
)):
# Token must be struct/enum/union/class.
method
=
getattr
(
self
,
'handle_'
+
token
.
name
)
self
.
_handling_typedef
=
True
tokens
=
[
method
()]
self
.
_handling_typedef
=
False
else
:
tokens
=
[
token
]
# Get the remainder of the typedef up to the semi-colon.
tokens
.
extend
(
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
))
# TODO(nnorwitz): clean all this up.
assert
tokens
name
=
tokens
.
pop
()
indices
=
name
if
tokens
:
indices
=
tokens
[
0
]
if
not
indices
:
indices
=
token
if
name
.
name
==
')'
:
# HACK(nnorwitz): Handle pointers to functions "properly".
if
(
len
(
tokens
)
>=
4
and
tokens
[
1
]
.
name
==
'('
and
tokens
[
2
]
.
name
==
'*'
):
tokens
.
append
(
name
)
name
=
tokens
[
3
]
elif
name
.
name
==
']'
:
# HACK(nnorwitz): Handle arrays properly.
if
len
(
tokens
)
>=
2
:
tokens
.
append
(
name
)
name
=
tokens
[
1
]
new_type
=
tokens
if
tokens
and
isinstance
(
tokens
[
0
],
tokenize
.
Token
):
new_type
=
self
.
converter
.
ToType
(
tokens
)[
0
]
return
Typedef
(
indices
.
start
,
indices
.
end
,
name
.
name
,
new_type
,
self
.
namespace_stack
)
def
handle_typeid
(
self
):
pass
# Not needed yet.
def
handle_typename
(
self
):
pass
# Not needed yet.
def
_GetTemplatedTypes
(
self
):
result
=
{}
tokens
=
list
(
self
.
_GetMatchingChar
(
'<'
,
'>'
))
len_tokens
=
len
(
tokens
)
-
1
# Ignore trailing '>'.
i
=
0
while
i
<
len_tokens
:
key
=
tokens
[
i
]
.
name
i
+=
1
if
keywords
.
IsKeyword
(
key
)
or
key
==
','
:
continue
type_name
=
default
=
None
if
i
<
len_tokens
:
i
+=
1
if
tokens
[
i
-
1
]
.
name
==
'='
:
assert
i
<
len_tokens
,
'
%s
%s
'
%
(
i
,
tokens
)
default
,
unused_next_token
=
self
.
GetName
(
tokens
[
i
:])
i
+=
len
(
default
)
else
:
if
tokens
[
i
-
1
]
.
name
!=
','
:
# We got something like: Type variable.
# Re-adjust the key (variable) and type_name (Type).
key
=
tokens
[
i
-
1
]
.
name
type_name
=
tokens
[
i
-
2
]
result
[
key
]
=
(
type_name
,
default
)
return
result
def
handle_template
(
self
):
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
assert
token
.
name
==
'<'
,
token
templated_types
=
self
.
_GetTemplatedTypes
()
# TODO(nnorwitz): for now, just ignore the template params.
token
=
self
.
_GetNextToken
()
if
token
.
token_type
==
tokenize
.
NAME
:
if
token
.
name
==
'class'
:
return
self
.
_GetClass
(
Class
,
VISIBILITY_PRIVATE
,
templated_types
)
elif
token
.
name
==
'struct'
:
return
self
.
_GetClass
(
Struct
,
VISIBILITY_PUBLIC
,
templated_types
)
elif
token
.
name
==
'friend'
:
return
self
.
handle_friend
()
self
.
_AddBackToken
(
token
)
tokens
,
last
=
self
.
_GetVarTokensUpTo
(
tokenize
.
SYNTAX
,
'('
,
';'
)
tokens
.
append
(
last
)
self
.
_AddBackTokens
(
tokens
)
if
last
.
name
==
'('
:
return
self
.
GetMethod
(
FUNCTION_NONE
,
templated_types
)
# Must be a variable definition.
return
None
def
handle_true
(
self
):
pass
# Nothing to do.
def
handle_false
(
self
):
pass
# Nothing to do.
def
handle_asm
(
self
):
pass
# Not needed yet.
def
handle_class
(
self
):
return
self
.
_GetClass
(
Class
,
VISIBILITY_PRIVATE
,
None
)
def
_GetBases
(
self
):
# Get base classes.
bases
=
[]
while
1
:
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
NAME
,
token
# TODO(nnorwitz): store kind of inheritance...maybe.
if
token
.
name
not
in
(
'public'
,
'protected'
,
'private'
):
# If inheritance type is not specified, it is private.
# Just put the token back so we can form a name.
# TODO(nnorwitz): it would be good to warn about this.
self
.
_AddBackToken
(
token
)
else
:
# Check for virtual inheritance.
token
=
self
.
_GetNextToken
()
if
token
.
name
!=
'virtual'
:
self
.
_AddBackToken
(
token
)
else
:
# TODO(nnorwitz): store that we got virtual for this base.
pass
base
,
next_token
=
self
.
GetName
()
bases_ast
=
self
.
converter
.
ToType
(
base
)
assert
len
(
bases_ast
)
==
1
,
bases_ast
bases
.
append
(
bases_ast
[
0
])
assert
next_token
.
token_type
==
tokenize
.
SYNTAX
,
next_token
if
next_token
.
name
==
'{'
:
token
=
next_token
break
# Support multiple inheritance.
assert
next_token
.
name
==
','
,
next_token
return
bases
,
token
def
_GetClass
(
self
,
class_type
,
visibility
,
templated_types
):
class_name
=
None
class_token
=
self
.
_GetNextToken
()
if
class_token
.
token_type
!=
tokenize
.
NAME
:
assert
class_token
.
token_type
==
tokenize
.
SYNTAX
,
class_token
token
=
class_token
else
:
# Skip any macro (e.g. storage class specifiers) after the
# 'class' keyword.
next_token
=
self
.
_GetNextToken
()
if
next_token
.
token_type
==
tokenize
.
NAME
:
self
.
_AddBackToken
(
next_token
)
else
:
self
.
_AddBackTokens
([
class_token
,
next_token
])
name_tokens
,
token
=
self
.
GetName
()
class_name
=
''
.
join
([
t
.
name
for
t
in
name_tokens
])
bases
=
None
if
token
.
token_type
==
tokenize
.
SYNTAX
:
if
token
.
name
==
';'
:
# Forward declaration.
return
class_type
(
class_token
.
start
,
class_token
.
end
,
class_name
,
None
,
templated_types
,
None
,
self
.
namespace_stack
)
if
token
.
name
in
'*&'
:
# Inline forward declaration. Could be method or data.
name_token
=
self
.
_GetNextToken
()
next_token
=
self
.
_GetNextToken
()
if
next_token
.
name
==
';'
:
# Handle data
modifiers
=
[
'class'
]
return
self
.
_CreateVariable
(
class_token
,
name_token
.
name
,
class_name
,
modifiers
,
token
.
name
,
None
)
else
:
# Assume this is a method.
tokens
=
(
class_token
,
token
,
name_token
,
next_token
)
self
.
_AddBackTokens
(
tokens
)
return
self
.
GetMethod
(
FUNCTION_NONE
,
None
)
if
token
.
name
==
':'
:
bases
,
token
=
self
.
_GetBases
()
body
=
None
if
token
.
token_type
==
tokenize
.
SYNTAX
and
token
.
name
==
'{'
:
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
assert
token
.
name
==
'{'
,
token
ast
=
AstBuilder
(
self
.
GetScope
(),
self
.
filename
,
class_name
,
visibility
,
self
.
namespace_stack
)
body
=
list
(
ast
.
Generate
())
if
not
self
.
_handling_typedef
:
token
=
self
.
_GetNextToken
()
if
token
.
token_type
!=
tokenize
.
NAME
:
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
assert
token
.
name
==
';'
,
token
else
:
new_class
=
class_type
(
class_token
.
start
,
class_token
.
end
,
class_name
,
bases
,
None
,
body
,
self
.
namespace_stack
)
modifiers
=
[]
return
self
.
_CreateVariable
(
class_token
,
token
.
name
,
new_class
,
modifiers
,
token
.
name
,
None
)
else
:
if
not
self
.
_handling_typedef
:
self
.
HandleError
(
'non-typedef token'
,
token
)
self
.
_AddBackToken
(
token
)
return
class_type
(
class_token
.
start
,
class_token
.
end
,
class_name
,
bases
,
templated_types
,
body
,
self
.
namespace_stack
)
def
handle_namespace
(
self
):
token
=
self
.
_GetNextToken
()
# Support anonymous namespaces.
name
=
None
if
token
.
token_type
==
tokenize
.
NAME
:
name
=
token
.
name
token
=
self
.
_GetNextToken
()
self
.
namespace_stack
.
append
(
name
)
assert
token
.
token_type
==
tokenize
.
SYNTAX
,
token
# Create an internal token that denotes when the namespace is complete.
internal_token
=
tokenize
.
Token
(
_INTERNAL_TOKEN
,
_NAMESPACE_POP
,
None
,
None
)
internal_token
.
whence
=
token
.
whence
if
token
.
name
==
'='
:
# TODO(nnorwitz): handle aliasing namespaces.
name
,
next_token
=
self
.
GetName
()
assert
next_token
.
name
==
';'
,
next_token
self
.
_AddBackToken
(
internal_token
)
else
:
assert
token
.
name
==
'{'
,
token
tokens
=
list
(
self
.
GetScope
())
# Replace the trailing } with the internal namespace pop token.
tokens
[
-
1
]
=
internal_token
# Handle namespace with nothing in it.
self
.
_AddBackTokens
(
tokens
)
return
None
def
handle_using
(
self
):
tokens
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
assert
tokens
return
Using
(
tokens
[
0
]
.
start
,
tokens
[
0
]
.
end
,
tokens
)
def
handle_explicit
(
self
):
assert
self
.
in_class
# Nothing much to do.
# TODO(nnorwitz): maybe verify the method name == class name.
# This must be a ctor.
return
self
.
GetMethod
(
FUNCTION_CTOR
,
None
)
def
handle_this
(
self
):
pass
# Nothing to do.
def
handle_operator
(
self
):
# Pull off the next token(s?) and make that part of the method name.
pass
def
handle_sizeof
(
self
):
pass
def
handle_case
(
self
):
pass
def
handle_switch
(
self
):
pass
def
handle_default
(
self
):
token
=
self
.
_GetNextToken
()
assert
token
.
token_type
==
tokenize
.
SYNTAX
assert
token
.
name
==
':'
def
handle_if
(
self
):
pass
def
handle_else
(
self
):
pass
def
handle_return
(
self
):
tokens
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
if
not
tokens
:
return
Return
(
self
.
current_token
.
start
,
self
.
current_token
.
end
,
None
)
return
Return
(
tokens
[
0
]
.
start
,
tokens
[
0
]
.
end
,
tokens
)
def
handle_goto
(
self
):
tokens
=
self
.
_GetTokensUpTo
(
tokenize
.
SYNTAX
,
';'
)
assert
len
(
tokens
)
==
1
,
str
(
tokens
)
return
Goto
(
tokens
[
0
]
.
start
,
tokens
[
0
]
.
end
,
tokens
[
0
]
.
name
)
def
handle_try
(
self
):
pass
# Not needed yet.
def
handle_catch
(
self
):
pass
# Not needed yet.
def
handle_throw
(
self
):
pass
# Not needed yet.
def
handle_while
(
self
):
pass
def
handle_do
(
self
):
pass
def
handle_for
(
self
):
pass
def
handle_break
(
self
):
self
.
_IgnoreUpTo
(
tokenize
.
SYNTAX
,
';'
)
def
handle_continue
(
self
):
self
.
_IgnoreUpTo
(
tokenize
.
SYNTAX
,
';'
)
def
BuilderFromSource
(
source
,
filename
):
"""Utility method that returns an AstBuilder from source code.
Args:
source: 'C++ source code'
filename: 'file1'
Returns:
AstBuilder
"""
return
AstBuilder
(
tokenize
.
GetTokens
(
source
),
filename
)
def
PrintIndentifiers
(
filename
,
should_print
):
"""Prints all identifiers for a C++ source file.
Args:
filename: 'file1'
should_print: predicate with signature: bool Function(token)
"""
source
=
utils
.
ReadFile
(
filename
,
False
)
if
source
is
None
:
sys
.
stderr
.
write
(
'Unable to find:
%s
\n
'
%
filename
)
return
#print('Processing %s' % actual_filename)
builder
=
BuilderFromSource
(
source
,
filename
)
try
:
for
node
in
builder
.
Generate
():
if
should_print
(
node
):
print
(
node
.
name
)
except
KeyboardInterrupt
:
return
except
:
pass
def
PrintAllIndentifiers
(
filenames
,
should_print
):
"""Prints all identifiers for each C++ source file in filenames.
Args:
filenames: ['file1', 'file2', ...]
should_print: predicate with signature: bool Function(token)
"""
for
path
in
filenames
:
PrintIndentifiers
(
path
,
should_print
)
def
main
(
argv
):
for
filename
in
argv
[
1
:]:
source
=
utils
.
ReadFile
(
filename
)
if
source
is
None
:
continue
print
(
'Processing
%s
'
%
filename
)
builder
=
BuilderFromSource
(
source
,
filename
)
try
:
entire_ast
=
filter
(
None
,
builder
.
Generate
())
except
KeyboardInterrupt
:
return
except
:
# Already printed a warning, print the traceback and continue.
traceback
.
print_exc
()
else
:
if
utils
.
DEBUG
:
for
ast
in
entire_ast
:
print
(
ast
)
if
__name__
==
'__main__'
:
main
(
sys
.
argv
)
Event Timeline
Log In to Comment