Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86931480
mkdoc.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Oct 9, 12:01
Size
12 KB
Mime Type
text/x-python
Expires
Fri, Oct 11, 12:01 (2 d)
Engine
blob
Format
Raw Data
Handle
21508152
Attached To
R9490 Homework_sp4e_Peruzzo_SáezUribe
mkdoc.py
View Options
#!/usr/bin/env python3
#
# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
# Extract documentation from C++ header files to use it in Python bindings
#
import
os
import
sys
import
platform
import
re
import
textwrap
from
clang
import
cindex
from
clang.cindex
import
CursorKind
from
collections
import
OrderedDict
from
glob
import
glob
from
threading
import
Thread
,
Semaphore
from
multiprocessing
import
cpu_count
RECURSE_LIST
=
[
CursorKind
.
TRANSLATION_UNIT
,
CursorKind
.
NAMESPACE
,
CursorKind
.
CLASS_DECL
,
CursorKind
.
STRUCT_DECL
,
CursorKind
.
ENUM_DECL
,
CursorKind
.
CLASS_TEMPLATE
]
PRINT_LIST
=
[
CursorKind
.
CLASS_DECL
,
CursorKind
.
STRUCT_DECL
,
CursorKind
.
ENUM_DECL
,
CursorKind
.
ENUM_CONSTANT_DECL
,
CursorKind
.
CLASS_TEMPLATE
,
CursorKind
.
FUNCTION_DECL
,
CursorKind
.
FUNCTION_TEMPLATE
,
CursorKind
.
CONVERSION_FUNCTION
,
CursorKind
.
CXX_METHOD
,
CursorKind
.
CONSTRUCTOR
,
CursorKind
.
FIELD_DECL
]
PREFIX_BLACKLIST
=
[
CursorKind
.
TRANSLATION_UNIT
]
CPP_OPERATORS
=
{
'<='
:
'le'
,
'>='
:
'ge'
,
'=='
:
'eq'
,
'!='
:
'ne'
,
'[]'
:
'array'
,
'+='
:
'iadd'
,
'-='
:
'isub'
,
'*='
:
'imul'
,
'/='
:
'idiv'
,
'%='
:
'imod'
,
'&='
:
'iand'
,
'|='
:
'ior'
,
'^='
:
'ixor'
,
'<<='
:
'ilshift'
,
'>>='
:
'irshift'
,
'++'
:
'inc'
,
'--'
:
'dec'
,
'<<'
:
'lshift'
,
'>>'
:
'rshift'
,
'&&'
:
'land'
,
'||'
:
'lor'
,
'!'
:
'lnot'
,
'~'
:
'bnot'
,
'&'
:
'band'
,
'|'
:
'bor'
,
'+'
:
'add'
,
'-'
:
'sub'
,
'*'
:
'mul'
,
'/'
:
'div'
,
'%'
:
'mod'
,
'<'
:
'lt'
,
'>'
:
'gt'
,
'='
:
'assign'
,
'()'
:
'call'
}
CPP_OPERATORS
=
OrderedDict
(
sorted
(
CPP_OPERATORS
.
items
(),
key
=
lambda
t
:
-
len
(
t
[
0
])))
job_count
=
cpu_count
()
job_semaphore
=
Semaphore
(
job_count
)
class
NoFilenamesError
(
ValueError
):
pass
def
d
(
s
):
return
s
if
isinstance
(
s
,
str
)
else
s
.
decode
(
'utf8'
)
def
sanitize_name
(
name
):
name
=
re
.
sub
(
r'type-parameter-0-([0-9]+)'
,
r'T\1'
,
name
)
for
k
,
v
in
CPP_OPERATORS
.
items
():
name
=
name
.
replace
(
'operator
%s
'
%
k
,
'operator_
%s
'
%
v
)
name
=
re
.
sub
(
'<.*>'
,
''
,
name
)
name
=
''
.
join
([
ch
if
ch
.
isalnum
()
else
'_'
for
ch
in
name
])
name
=
re
.
sub
(
'_$'
,
''
,
re
.
sub
(
'_+'
,
'_'
,
name
))
return
'__doc_'
+
name
def
process_comment
(
comment
):
result
=
''
# Remove C++ comment syntax
leading_spaces
=
float
(
'inf'
)
for
s
in
comment
.
expandtabs
(
tabsize
=
4
)
.
splitlines
():
s
=
s
.
strip
()
if
s
.
startswith
(
'/*'
):
s
=
s
[
2
:]
.
lstrip
(
'*'
)
elif
s
.
endswith
(
'*/'
):
s
=
s
[:
-
2
]
.
rstrip
(
'*'
)
elif
s
.
startswith
(
'///'
):
s
=
s
[
3
:]
if
s
.
startswith
(
'*'
):
s
=
s
[
1
:]
if
len
(
s
)
>
0
:
leading_spaces
=
min
(
leading_spaces
,
len
(
s
)
-
len
(
s
.
lstrip
()))
result
+=
s
+
'
\n
'
if
leading_spaces
!=
float
(
'inf'
):
result2
=
""
for
s
in
result
.
splitlines
():
result2
+=
s
[
leading_spaces
:]
+
'
\n
'
result
=
result2
# Doxygen tags
cpp_group
=
'([\w:]+)'
param_group
=
'([\[\w:\]]+)'
s
=
result
s
=
re
.
sub
(
r'\\c\s+
%s
'
%
cpp_group
,
r'``\1``'
,
s
)
s
=
re
.
sub
(
r'\\a\s+
%s
'
%
cpp_group
,
r'*\1*'
,
s
)
s
=
re
.
sub
(
r'\\e\s+
%s
'
%
cpp_group
,
r'*\1*'
,
s
)
s
=
re
.
sub
(
r'\\em\s+
%s
'
%
cpp_group
,
r'*\1*'
,
s
)
s
=
re
.
sub
(
r'\\b\s+
%s
'
%
cpp_group
,
r'**\1**'
,
s
)
s
=
re
.
sub
(
r'\\ingroup\s+
%s
'
%
cpp_group
,
r''
,
s
)
s
=
re
.
sub
(
r'\\param
%s
?\s+
%s
'
%
(
param_group
,
cpp_group
),
r'\n\n$Parameter ``\2``:\n\n'
,
s
)
s
=
re
.
sub
(
r'\\tparam
%s
?\s+
%s
'
%
(
param_group
,
cpp_group
),
r'\n\n$Template parameter ``\2``:\n\n'
,
s
)
for
in_
,
out_
in
{
'return'
:
'Returns'
,
'author'
:
'Author'
,
'authors'
:
'Authors'
,
'copyright'
:
'Copyright'
,
'date'
:
'Date'
,
'remark'
:
'Remark'
,
'sa'
:
'See also'
,
'see'
:
'See also'
,
'extends'
:
'Extends'
,
'throw'
:
'Throws'
,
'throws'
:
'Throws'
}
.
items
():
s
=
re
.
sub
(
r'\\
%s
\s*'
%
in_
,
r'\n\n$
%s
:\n\n'
%
out_
,
s
)
s
=
re
.
sub
(
r'\\details\s*'
,
r'\n\n'
,
s
)
s
=
re
.
sub
(
r'\\brief\s*'
,
r''
,
s
)
s
=
re
.
sub
(
r'\\short\s*'
,
r''
,
s
)
s
=
re
.
sub
(
r'\\ref\s*'
,
r''
,
s
)
s
=
re
.
sub
(
r'\\code\s?(.*?)\s?\\endcode'
,
r"```\n\1\n```\n"
,
s
,
flags
=
re
.
DOTALL
)
# HTML/TeX tags
s
=
re
.
sub
(
r'<tt>(.*?)</tt>'
,
r'``\1``'
,
s
,
flags
=
re
.
DOTALL
)
s
=
re
.
sub
(
r'<pre>(.*?)</pre>'
,
r"```\n\1\n```\n"
,
s
,
flags
=
re
.
DOTALL
)
s
=
re
.
sub
(
r'<em>(.*?)</em>'
,
r'*\1*'
,
s
,
flags
=
re
.
DOTALL
)
s
=
re
.
sub
(
r'<b>(.*?)</b>'
,
r'**\1**'
,
s
,
flags
=
re
.
DOTALL
)
s
=
re
.
sub
(
r'\\f\$(.*?)\\f\$'
,
r'$\1$'
,
s
,
flags
=
re
.
DOTALL
)
s
=
re
.
sub
(
r'<li>'
,
r'\n\n* '
,
s
)
s
=
re
.
sub
(
r'</?ul>'
,
r''
,
s
)
s
=
re
.
sub
(
r'</li>'
,
r'\n\n'
,
s
)
s
=
s
.
replace
(
'``true``'
,
'``True``'
)
s
=
s
.
replace
(
'``false``'
,
'``False``'
)
# Re-flow text
wrapper
=
textwrap
.
TextWrapper
()
wrapper
.
expand_tabs
=
True
wrapper
.
replace_whitespace
=
True
wrapper
.
drop_whitespace
=
True
wrapper
.
width
=
70
wrapper
.
initial_indent
=
wrapper
.
subsequent_indent
=
''
result
=
''
in_code_segment
=
False
for
x
in
re
.
split
(
r'(```)'
,
s
):
if
x
==
'```'
:
if
not
in_code_segment
:
result
+=
'```
\n
'
else
:
result
+=
'
\n
```
\n\n
'
in_code_segment
=
not
in_code_segment
elif
in_code_segment
:
result
+=
x
.
strip
()
else
:
for
y
in
re
.
split
(
r'(?: *\n *){2,}'
,
x
):
wrapped
=
wrapper
.
fill
(
re
.
sub
(
r'\s+'
,
' '
,
y
)
.
strip
())
if
len
(
wrapped
)
>
0
and
wrapped
[
0
]
==
'$'
:
result
+=
wrapped
[
1
:]
+
'
\n
'
wrapper
.
initial_indent
=
\
wrapper
.
subsequent_indent
=
' '
*
4
else
:
if
len
(
wrapped
)
>
0
:
result
+=
wrapped
+
'
\n\n
'
wrapper
.
initial_indent
=
wrapper
.
subsequent_indent
=
''
return
result
.
rstrip
()
.
lstrip
(
'
\n
'
)
def
extract
(
filename
,
node
,
prefix
,
output
):
if
not
(
node
.
location
.
file
is
None
or
os
.
path
.
samefile
(
d
(
node
.
location
.
file
.
name
),
filename
)):
return
0
if
node
.
kind
in
RECURSE_LIST
:
sub_prefix
=
prefix
if
node
.
kind
not
in
PREFIX_BLACKLIST
:
if
len
(
sub_prefix
)
>
0
:
sub_prefix
+=
'_'
sub_prefix
+=
d
(
node
.
spelling
)
for
i
in
node
.
get_children
():
extract
(
filename
,
i
,
sub_prefix
,
output
)
if
node
.
kind
in
PRINT_LIST
:
comment
=
d
(
node
.
raw_comment
)
if
node
.
raw_comment
is
not
None
else
''
comment
=
process_comment
(
comment
)
sub_prefix
=
prefix
if
len
(
sub_prefix
)
>
0
:
sub_prefix
+=
'_'
if
len
(
node
.
spelling
)
>
0
:
name
=
sanitize_name
(
sub_prefix
+
d
(
node
.
spelling
))
output
.
append
((
name
,
filename
,
comment
))
class
ExtractionThread
(
Thread
):
def
__init__
(
self
,
filename
,
parameters
,
output
):
Thread
.
__init__
(
self
)
self
.
filename
=
filename
self
.
parameters
=
parameters
self
.
output
=
output
job_semaphore
.
acquire
()
def
run
(
self
):
print
(
'Processing "
%s
" ..'
%
self
.
filename
,
file
=
sys
.
stderr
)
try
:
index
=
cindex
.
Index
(
cindex
.
conf
.
lib
.
clang_createIndex
(
False
,
True
))
tu
=
index
.
parse
(
self
.
filename
,
self
.
parameters
)
extract
(
self
.
filename
,
tu
.
cursor
,
''
,
self
.
output
)
finally
:
job_semaphore
.
release
()
def
read_args
(
args
):
parameters
=
[]
filenames
=
[]
if
"-x"
not
in
args
:
parameters
.
extend
([
'-x'
,
'c++'
])
if
not
any
(
it
.
startswith
(
"-std="
)
for
it
in
args
):
parameters
.
append
(
'-std=c++11'
)
if
platform
.
system
()
==
'Darwin'
:
dev_path
=
'/Applications/Xcode.app/Contents/Developer/'
lib_dir
=
dev_path
+
'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
sdk_dir
=
dev_path
+
'Platforms/MacOSX.platform/Developer/SDKs'
libclang
=
lib_dir
+
'libclang.dylib'
if
os
.
path
.
exists
(
libclang
):
cindex
.
Config
.
set_library_path
(
os
.
path
.
dirname
(
libclang
))
if
os
.
path
.
exists
(
sdk_dir
):
sysroot_dir
=
os
.
path
.
join
(
sdk_dir
,
next
(
os
.
walk
(
sdk_dir
))[
1
][
0
])
parameters
.
append
(
'-isysroot'
)
parameters
.
append
(
sysroot_dir
)
elif
platform
.
system
()
==
'Linux'
:
# clang doesn't find its own base includes by default on Linux,
# but different distros install them in different paths.
# Try to autodetect, preferring the highest numbered version.
def
clang_folder_version
(
d
):
return
[
int
(
ver
)
for
ver
in
re
.
findall
(
r'(?<!lib)(?<!\d)\d+'
,
d
)]
clang_include_dir
=
max
((
path
for
libdir
in
[
'lib64'
,
'lib'
,
'lib32'
]
for
path
in
glob
(
'/usr/
%s
/clang/*/include'
%
libdir
)
if
os
.
path
.
isdir
(
path
)
),
default
=
None
,
key
=
clang_folder_version
)
if
clang_include_dir
:
parameters
.
extend
([
'-isystem'
,
clang_include_dir
])
for
item
in
args
:
if
item
.
startswith
(
'-'
):
parameters
.
append
(
item
)
else
:
filenames
.
append
(
item
)
if
len
(
filenames
)
==
0
:
raise
NoFilenamesError
(
"args parameter did not contain any filenames"
)
return
parameters
,
filenames
def
extract_all
(
args
):
parameters
,
filenames
=
read_args
(
args
)
output
=
[]
for
filename
in
filenames
:
thr
=
ExtractionThread
(
filename
,
parameters
,
output
)
thr
.
start
()
print
(
'Waiting for jobs to finish ..'
,
file
=
sys
.
stderr
)
for
i
in
range
(
job_count
):
job_semaphore
.
acquire
()
return
output
def
write_header
(
comments
,
out_file
=
sys
.
stdout
):
print
(
'''/*
This file contains docstrings for the Python bindings.
Do not edit! These were automatically extracted by mkdoc.py
*/
#define __EXPAND(x) x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b) a ## b
#define __CAT2(a, b) __CAT1(a, b)
#define __DOC1(n1) __doc_##n1
#define __DOC2(n1, n2) __doc_##n1##_##n2
#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
'''
,
file
=
out_file
)
name_ctr
=
1
name_prev
=
None
for
name
,
_
,
comment
in
list
(
sorted
(
comments
,
key
=
lambda
x
:
(
x
[
0
],
x
[
1
]))):
if
name
==
name_prev
:
name_ctr
+=
1
name
=
name
+
"_
%i
"
%
name_ctr
else
:
name_prev
=
name
name_ctr
=
1
print
(
'
\n
static const char *
%s
=
%s
R"doc(
%s
)doc";'
%
(
name
,
'
\n
'
if
'
\n
'
in
comment
else
' '
,
comment
),
file
=
out_file
)
print
(
'''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
'''
,
file
=
out_file
)
def
mkdoc
(
args
):
args
=
list
(
args
)
out_path
=
None
for
idx
,
arg
in
enumerate
(
args
):
if
arg
.
startswith
(
"-o"
):
args
.
remove
(
arg
)
try
:
out_path
=
arg
[
2
:]
or
args
.
pop
(
idx
)
except
IndexError
:
print
(
"-o flag requires an argument"
)
exit
(
-
1
)
break
comments
=
extract_all
(
args
)
if
out_path
:
try
:
with
open
(
out_path
,
'w'
)
as
out_file
:
write_header
(
comments
,
out_file
)
except
:
# In the event of an error, don't leave a partially-written
# output file.
try
:
os
.
unlink
(
out_path
)
except
:
pass
raise
else
:
write_header
(
comments
)
if
__name__
==
'__main__'
:
try
:
mkdoc
(
sys
.
argv
[
1
:])
except
NoFilenamesError
:
print
(
'Syntax:
%s
[.. a list of header files ..]'
%
sys
.
argv
[
0
])
exit
(
-
1
)
Event Timeline
Log In to Comment