Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F83509960
latex_structure.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Sep 17, 13:07
Size
11 KB
Mime Type
text/x-python
Expires
Thu, Sep 19, 13:07 (2 d)
Engine
blob
Format
Raw Data
Handle
20849866
Attached To
R3683 Slides
latex_structure.py
View Options
#!/usr/bin/env python3
################################################################
import
re
import
types
import
pyparsing
as
pp
################################################################
class
LatexEnvironment
(
object
):
def
__init__
(
self
,
toks
):
self
.
toks
=
toks
self
.
content
=
self
.
toks
[
1
:
-
1
]
self
.
head
=
self
.
toks
[
0
]
self
.
tail
=
self
.
toks
[
-
1
]
self
.
name
=
self
.
head
.
toks
[
3
]
self
.
hide
=
False
try
:
self
.
option
=
self
.
head
.
toks
[
6
]
except
Exception
:
self
.
option
=
None
# print('env: {0}:{1}:{2}'.format(
# self.name, self.option, self.content))
def
__str__
(
self
):
if
self
.
hide
:
return
''
return
''
.
join
([
str
(
self
.
head
)]
+
[
str
(
i
)
for
i
in
self
.
content
]
+
[
str
(
self
.
tail
)])
def
__getitem__
(
self
,
index
):
return
self
.
toks
[
index
]
class
LatexCommand
(
object
):
def
__init__
(
self
,
toks
):
self
.
name
=
toks
[
1
]
self
.
toks
=
toks
# print('create_command:', self.name, toks)
def
__str__
(
self
):
return
''
.
join
(
self
.
toks
)
def
__getitem__
(
self
,
index
):
return
self
.
toks
[
index
]
class
LatexBlock
(
object
):
def
__init__
(
self
,
toks
,
name
=
None
):
self
.
toks
=
toks
self
.
name
=
name
def
__str__
(
self
):
return
''
.
join
([
str
(
t
)
for
t
in
self
.
toks
])
class
LatexMain
(
object
):
def
__init__
(
self
,
toks
):
self
.
toks
=
toks
self
.
content
=
self
.
toks
self
.
name
=
"main"
def
__str__
(
self
):
# for i in self.toks:
# print('LatexMain:\n', type(i), str(i))
res
=
''
.
join
([
str
(
t
)
for
t
in
self
.
toks
])
# print(res)
return
res
def
__getitem__
(
self
,
index
):
return
self
.
toks
[
index
]
################################################################
class
LatexStructure
:
def
getBlocksFromType
(
self
,
typ
):
mylist
=
[]
def
foo
(
b
):
if
b
.
name
==
typ
:
mylist
.
append
(
b
)
self
.
pathInBlock
(
fbegin
=
foo
)
return
mylist
@staticmethod
def
ppValidCharacters
():
valid_characters
=
pp
.
printables
valid_characters
=
valid_characters
.
replace
(
'%'
,
''
)
valid_characters
=
valid_characters
.
replace
(
'{'
,
''
)
valid_characters
=
valid_characters
.
replace
(
'}'
,
''
)
valid_characters
=
valid_characters
.
replace
(
'
\\
'
,
''
)
valid_characters
+=
'
\t\r\n
'
return
valid_characters
@property
def
text
(
self
):
if
'_text'
not
in
self
.
__dict__
:
self
.
_text
=
pp
.
Word
(
self
.
ppValidCharacters
())
self
.
_text
=
(
self
.
_text
|
pp
.
Literal
(
'
\\\\
'
)
|
pp
.
Literal
(
r'\&'
)
|
pp
.
Literal
(
r'\%'
)
|
pp
.
Literal
(
r'\#'
)
|
pp
.
Literal
(
r'\_'
))
self
.
_text
.
leaveWhitespace
()
# self._text.addParseAction(lambda toks: print('text:', toks))
return
self
.
_text
@property
def
comment
(
self
):
if
'_comment'
not
in
self
.
__dict__
:
self
.
_comment
=
pp
.
Literal
(
'%'
)
self
.
_comment
+=
pp
.
SkipTo
(
pp
.
LineEnd
())
# self._comment.leaveWhitespace()
# self._comment.addParseAction(lambda toks: print('comment: ', toks))
return
self
.
_comment
@property
def
block
(
self
):
if
'_block'
not
in
self
.
__dict__
:
_start
=
pp
.
Literal
(
'{'
)
_end
=
pp
.
Literal
(
'}'
)
_content
=
pp
.
Forward
()
.
leaveWhitespace
()
_block
=
_start
+
_content
+
_end
self
.
_block
=
_block
_content
<<
pp
.
ZeroOrMore
(
self
.
environment
()
|
self
.
ppCommand
()
|
self
.
block
|
self
.
text
|
self
.
comment
)
self
.
_block
.
leaveWhitespace
()
def
createBlock
(
toks
):
b
=
LatexBlock
(
toks
)
return
b
# _block.addParseAction(lambda toks: print('block:', toks))
_block
.
addParseAction
(
createBlock
)
return
self
.
_block
.
leaveWhitespace
()
@staticmethod
def
ppCommand
(
name
=
None
):
_command
=
pp
.
Literal
(
'
\\
'
)
.
leaveWhitespace
()
if
name
is
None
:
_command
+=
pp
.
Word
(
pp
.
alphanums
+
'@'
)
.
leaveWhitespace
()
else
:
_command
+=
pp
.
Literal
(
name
)
.
leaveWhitespace
()
option
=
(
pp
.
Literal
(
'['
)
+
pp
.
delimitedList
(
pp
.
Word
(
pp
.
alphanums
),
combine
=
True
)
+
pp
.
Literal
(
']'
))
.
leaveWhitespace
()
valid_param_character
=
pp
.
printables
+
'
\t\r\n\\
'
valid_param_character
=
valid_param_character
.
replace
(
'{'
,
''
)
valid_param_character
=
valid_param_character
.
replace
(
'}'
,
''
)
param_name
=
pp
.
delimitedList
(
pp
.
Word
(
valid_param_character
),
combine
=
True
)
parameters
=
(
pp
.
Literal
(
'{'
)
+
param_name
+
pp
.
Literal
(
'}'
)
.
leaveWhitespace
())
.
leaveWhitespace
()
_command
+=
pp
.
ZeroOrMore
(
option
|
parameters
)
.
leaveWhitespace
()
def
createCommand
(
toks
):
c
=
LatexCommand
(
toks
)
return
c
_command
.
addParseAction
(
createCommand
)
# _command.addParseAction(lambda cmd: print('create_command:',
# cmd[0].name, cmd[0].toks))
return
_command
.
leaveWhitespace
()
def
environment
(
self
):
_env_start
=
self
.
ppCommand
(
'begin'
)
_env_end
=
self
.
ppCommand
(
'end'
)
_env_content
=
pp
.
Forward
()
.
leaveWhitespace
()
_environment
=
_env_start
+
_env_content
_environment
+=
_env_end
def
set_excluding_command
(
toks
):
env_name
=
toks
[
0
][
3
]
# print('startenv:', env_name)
_command_excluding
=
self
.
ppCommand
()
# print('command_excluding:', env_name)
if
env_name
==
'python'
:
python_block
=
pp
.
SkipTo
(
pp
.
Literal
(
r'\end{python}'
))
_env_content
<<
python_block
else
:
_env_content
<<
pp
.
ZeroOrMore
(
self
.
environment
()
.
leaveWhitespace
()
|
_command_excluding
|
self
.
block
|
self
.
text
|
self
.
comment
)
def
check
(
toks
,
env_name
):
# print('check:', env_name)
if
toks
[
0
][
1
]
!=
'end'
:
return
toks
# print('check:', toks, env_name)
# print('check2:', toks[0], env_name)
# print('check2:', type(toks[0]), env_name)
# print('check3:', toks[0][3], env_name)
# print('check2:', toks[0][3])
if
toks
[
0
][
3
]
==
env_name
:
# print ('biiiiip')
return
toks
[
652336456
]
return
toks
# _command_excluding.addParseAction(
# lambda toks: print('command_excluding:', toks))
_command_excluding
.
addParseAction
(
lambda
toks
:
check
(
toks
,
env_name
))
# print('set_excluding_command: done')
_env_start
.
addParseAction
(
set_excluding_command
)
# _env_content.addParseAction(lambda toks: print('found_content:', toks))
# _env_end.addParseAction(lambda toks: print('found_end:', toks[0][3]))
def
createEnvironment
(
toks
):
e
=
LatexEnvironment
(
toks
)
return
e
_environment
.
addParseAction
(
createEnvironment
)
return
_environment
.
leaveWhitespace
()
def
parseLatexFile
(
self
,
filename
):
fin
=
open
(
filename
,
'r'
)
inp
=
fin
.
read
()
fin
.
close
()
self
.
parseLatex
(
inp
)
def
parseLatex
(
self
,
latex_code
):
_content
=
pp
.
ZeroOrMore
(
self
.
environment
()
|
self
.
block
|
self
.
ppCommand
()
|
self
.
text
|
self
.
comment
)
_content
.
leaveWhitespace
()
self
.
_content
=
LatexMain
(
_content
.
parseString
(
latex_code
))
def
buildLatexBlocks
(
self
,
filename
,
herited_types
=
dict
()):
fin
=
open
(
filename
,
'r'
)
inp
=
fin
.
readlines
()
inp
=
""
.
join
(
inp
)
latex_cmd_expr
=
r'(\\\w+(?:\[\w*\])*(?:{[\w|,|\.|(|)]*?})+)'
splitted
=
re
.
split
(
latex_cmd_expr
,
inp
)
self
.
main_block
=
LatexBlock
()
self
.
current_block
=
self
.
main_block
for
i
in
splitted
:
m
=
re
.
match
(
r'\\begin{(.*?)}(.*)'
,
i
)
if
m
:
name
=
m
.
group
(
1
)
options
=
m
.
group
(
2
)
self
.
current_block
=
self
.
current_block
.
createSubBlock
(
name
,
options
,
herited_types
)
continue
m
=
re
.
match
(
r'\\end{(.*?)}'
,
i
)
if
m
:
name
=
m
.
group
(
1
)
try
:
self
.
current_block
=
self
.
current_block
.
endSubBlock
(
name
)
except
Exception
as
e
:
print
(
"AAAAAAAAAAAAAAAAAA"
)
print
(
e
)
continue
self
.
current_block
.
appendContent
(
i
)
if
not
self
.
current_block
==
self
.
main_block
:
raise
Exception
(
"one latex block was not closed: {0}"
.
format
(
self
.
current_block
.
name
))
def
pathInBlockOld
(
self
,
block
=
None
,
begin_functor
=
None
,
end_functor
=
None
,
text_functor
=
None
):
if
block
is
None
:
block
=
self
.
main_block
if
begin_functor
is
not
None
:
begin_functor
(
block
)
for
i
in
range
(
len
(
block
.
content
)):
c
=
block
.
content
[
i
]
if
isinstance
(
c
,
types
.
InstanceType
):
try
:
self
.
pathInBlock
(
c
,
begin_functor
,
end_functor
,
text_functor
)
except
Exception
as
e
:
print
(
e
)
else
:
if
text_functor
is
not
None
:
block
.
content
[
i
]
=
text_functor
(
block
,
c
)
if
end_functor
is
not
None
:
end_functor
(
block
)
def
pathInBlock
(
self
,
block
=
None
,
begin_functor
=
None
,
end_functor
=
None
,
text_functor
=
None
):
if
text_functor
is
None
:
def
text_functor
(
b
,
c
):
return
c
if
block
is
None
:
block
=
self
.
_content
if
begin_functor
is
not
None
:
begin_functor
(
block
)
for
i
,
c
in
enumerate
(
block
.
content
):
# print(type(c), c)
if
isinstance
(
c
,
LatexEnvironment
):
self
.
pathInBlock
(
c
,
begin_functor
,
end_functor
,
text_functor
)
else
:
block
.
content
[
i
]
=
text_functor
(
block
,
str
(
c
))
if
end_functor
is
not
None
:
end_functor
(
block
)
def
__str__
(
self
):
return
str
(
self
.
_content
)
@property
def
content
(
self
):
return
self
.
_content
def
__init__
(
self
):
self
.
_content
=
None
################################################################
Event Timeline
Log In to Comment