Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60949513
page.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, May 3, 13:45
Size
25 KB
Mime Type
text/x-python
Expires
Sun, May 5, 13:45 (2 d)
Engine
blob
Format
Raw Data
Handle
17444215
Attached To
rNIETZSCHEPYTHON nietzsche-python
page.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a page.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
os.path
import
isfile
,
basename
from
progress.bar
import
Bar
from
svgpathtools
import
svg2paths2
,
svg_to_paths
from
svgpathtools.parser
import
parse_path
import
re
import
sys
import
warnings
from
.box
import
Box
from
.color
import
Color
from
.image
import
Image
,
SVGImage
from
.editor_comment
import
EditorComment
from
.faksimile_image
import
FaksimileImage
from
.faksimile_position
import
FaksimilePosition
from
.imprint
import
Imprint
from
.lineNumber
import
LineNumber
from
.line
import
Line
from
.mark_foreign_hands
import
MarkForeignHands
from
.matrix
import
Matrix
from
.path
import
Path
from
.positional_word_part
import
PositionalWordPart
from
.super_page
import
SuperPage
from
.style
import
Style
from
.text_connection_mark
import
TextConnectionMark
from
.text_field
import
TextField
from
.transkriptionField
import
TranskriptionField
from
.writing_process
import
WritingProcess
from
.word
import
Word
from
.word_deletion_path
import
WordDeletionPath
from
.word_insertion_mark
import
WordInsertionMark
sys
.
path
.
append
(
'py2ttl'
)
from
class_spec
import
SemanticClass
sys
.
path
.
append
(
'shared_util'
)
from
main_util
import
extract_paths_on_tf
,
get_paths_near_position
FILE_TYPE_SVG_WORD_POSITION
=
SuperPage
.
FILE_TYPE_SVG_WORD_POSITION
FILE_TYPE_XML_MANUSCRIPT
=
SuperPage
.
FILE_TYPE_XML_MANUSCRIPT
STATUS_MERGED_OK
=
SuperPage
.
STATUS_MERGED_OK
STATUS_POSTMERGED_OK
=
SuperPage
.
STATUS_POSTMERGED_OK
class
Page
(
SemanticClass
,
SuperPage
):
"""
This class represents a page.
Args:
xml_source_file (str): name of the xml file to be instantiated.
faksimile_image: FaksimileImage.
faksimile_svgFile: svg file containing information about word positions.
"""
UNITTESTING
=
False
def
__init__
(
self
,
xml_source_file
=
None
,
faksimile_image
=
None
,
faksimile_svgFile
=
None
,
add_paths_near_words
=
False
,
warn
=
False
,
number
=
None
):
if
xml_source_file
is
not
None
:
super
(
Page
,
self
)
.
__init__
(
xml_source_file
)
self
.
update_property_dictionary
(
'faksimile_image'
,
faksimile_image
)
self
.
update_property_dictionary
(
'faksimile_svgFile'
,
faksimile_svgFile
)
self
.
init_all_properties
()
self
.
add_style
(
style_node
=
self
.
page_tree
.
getroot
()
.
find
(
'.//style'
))
self
.
faksimile_text_field
=
None
self
.
svg_text_field
=
None
self
.
init_node_objects
()
self
.
warn
=
warn
self
.
add_deletion_paths_to_words
(
add_paths_near_words
)
else
:
self
.
page_tree
=
None
self
.
number
=
number
def
add_deletion_paths_to_words
(
self
,
add_paths_near_words
=
False
):
"""Add deletion paths to words.
"""
words
=
[
word
for
word
in
self
.
words
if
(
len
(
word
.
word_parts
)
==
0
and
word
.
deleted
and
len
(
word
.
deletion_paths
)
==
0
)
\
or
'add_paths_near_words'
in
word
.
process_flags
]
words
+=
[
word
for
word
in
self
.
words
\
if
len
(
word
.
word_parts
)
>
0
and
True
in
\
[
(
wp
.
deleted
and
len
(
wp
.
deletion_paths
)
==
0
)
for
wp
in
word
.
word_parts
]]
if
len
(
words
)
>
0
and
((
self
.
svg_file
is
not
None
and
isfile
(
self
.
svg_file
))
\
or
(
self
.
source
is
not
None
and
isfile
(
self
.
source
))):
svg_file
=
self
.
svg_file
if
self
.
svg_file
is
not
None
else
self
.
source
transkription_field
=
TranskriptionField
(
svg_file
)
tr_xmin
=
transkription_field
.
xmin
if
(
self
.
svg_image
is
None
or
self
.
svg_image
.
text_field
is
None
)
else
0
tr_ymin
=
transkription_field
.
ymin
if
(
self
.
svg_image
is
None
or
self
.
svg_image
.
text_field
is
None
)
else
0
word_deletion_paths
=
self
.
word_deletion_paths
index
=
0
dp_updated
=
False
while
index
<
len
(
words
):
word
=
words
[
index
]
word
.
add_deletion_paths
(
word_deletion_paths
,
tr_xmin
=
tr_xmin
,
tr_ymin
=
tr_ymin
)
if
len
(
word
.
deletion_paths
)
>
0
or
True
in
[
len
(
w
.
deletion_paths
)
>
0
for
w
in
word
.
word_parts
]:
deletion_paths
=
word
.
deletion_paths
for
wp
in
word
.
word_parts
:
deletion_paths
+=
wp
.
deletion_paths
for
deletion_path
in
deletion_paths
:
if
deletion_path
not
in
self
.
word_deletion_paths
:
self
.
word_deletion_paths
.
append
(
deletion_path
)
elif
not
dp_updated
:
word_deletion_paths
=
extract_paths_on_tf
(
self
)
dp_updated
=
True
index
-=
1
if
add_paths_near_words
\
and
(
'add_paths_near_words'
in
word
.
process_flags
\
or
((
word
.
deleted
and
len
(
word
.
deletion_paths
)
==
0
)
\
or
True
in
[
(
w
.
deleted
and
len
(
w
.
deletion_paths
)
==
0
)
for
w
in
word
.
word_parts
])):
if
not
dp_updated
\
and
'add_paths_near_words'
in
word
.
process_flags
:
word_deletion_paths
=
extract_paths_on_tf
(
self
)
dp_updated
=
True
transform
=
None
tp
=
None
target_word
=
word
paths_near_word
=
[]
if
word
.
deleted
and
len
(
word
.
transkription_positions
)
>
0
:
transform
=
word
.
transkription_positions
[
0
]
.
transform
for
tp
in
word
.
transkription_positions
:
word
.
deletion_paths_near_word
+=
get_paths_near_position
(
tp
,
word_deletion_paths
)
elif
len
(
word
.
word_parts
)
>
0
:
for
wp
in
word
.
word_parts
:
if
wp
.
deleted
and
len
(
wp
.
transkription_positions
)
>
0
:
target_word
=
wp
for
tp
in
wp
.
transkription_positions
:
wp
.
deletion_paths_near_word
=
get_paths_near_position
(
tp
,
word_deletion_paths
)
if
self
.
warn
and
(
word
.
deleted
and
len
(
word
.
deletion_paths
)
==
0
):
warnings
.
warn
(
\
f
'WARNING: {self.title} {self.number}: {word.id} on {word.line_number}, {word.text} has no deletion paths! {target_word.deletion_paths_near_word}, {transform}'
)
index
+=
1
@classmethod
def
create_cls
(
cls
,
xml_source_file
=
None
,
create_dummy_page
=
False
,
isBlank
=
False
,
page_node
=
None
):
"""Create a Page.
"""
if
not
create_dummy_page
:
page
=
cls
(
xml_source_file
)
page
.
status
=
'complete'
if
isBlank
:
page
.
status
=
'blank'
page
.
words
=
[]
page
.
lines
=
[]
page
.
word_deletion_paths
=
[]
page
.
word_insertion_marks
=
[]
return
page
else
:
m
=
re
.
match
(
r'(.*)(page[0]*)(.*)(\.xml)'
,
xml_source_file
)
if
m
is
not
None
and
len
(
m
.
groups
())
>
3
:
number
=
m
.
group
(
3
)
else
:
number
=
basename
(
xml_source_file
)
.
replace
(
'.xml'
,
''
)
return
cls
(
number
=
number
)
@classmethod
def
get_pages_from_xml_file
(
cls
,
xml_file
,
status_contains
=
''
,
status_not_contain
=
''
,
word_selection_function
=
None
):
"""Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION
or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
[optional: instantiation depends on the fulfilment of a status_contains
and/or on the selection of some words by a word_selection_function].
"""
source_tree
=
ET
.
parse
(
xml_file
)
if
source_tree
.
getroot
()
.
find
(
'metadata/type'
)
.
text
==
cls
.
FILE_TYPE_SVG_WORD_POSITION
:
page
=
cls
(
xml_file
)
if
word_selection_function
is
None
or
len
(
word_selection_function
(
page
.
words
))
>
0
:
return
[
page
]
else
:
return
[]
elif
source_tree
.
getroot
()
.
find
(
'metadata/type'
)
.
text
==
FILE_TYPE_XML_MANUSCRIPT
:
pages
=
[]
xpath
=
'//page/@output'
if
status_contains
!=
''
and
status_not_contain
!=
''
:
xpath
=
'//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'
.
format
(
status_contains
,
status_not_contain
)
elif
status_contains
!=
''
:
xpath
=
'//page[contains(@status, "{0}")]/@output'
.
format
(
status_contains
)
elif
status_not_contain
!=
''
:
xpath
=
'//page[not(contains(@status, "{0}"))]/@output'
.
format
(
status_not_contain
)
for
xml_source_file
in
source_tree
.
xpath
(
xpath
):
if
isfile
(
xml_source_file
):
pages
+=
cls
.
get_pages_from_xml_file
(
xml_source_file
,
word_selection_function
=
word_selection_function
)
return
pages
else
:
return
[]
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
{}
class_dict
=
cls
.
get_class_dictionary
()
properties
=
{
'number'
:
{
'class'
:
str
,
'cardinality'
:
1
}}
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'status'
,
str
,
\
name
=
'pageHasDataProcessingStatus'
,
label
=
'status of data processing'
,
\
comment
=
'The status of the data processing of this page'
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'faksimile_image'
,
FaksimileImage
,
subPropertyOf
=
cls
.
HAS_IMAGE
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'faksimile_text_field'
,
TextField
,
\
name
=
'pageIsOnFaksimileTextField'
,
label
=
'page is on faksimile text field'
,
\
comment
=
'Relates a page to the text field on a svg image.'
,
subPropertyOf
=
cls
.
PAGE_IS_ON_TEXTFIELD
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'orientation'
,
str
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'status'
,
str
,
\
name
=
'pageHasDataProcessingStatus'
,
label
=
'status of data processing'
,
\
comment
=
'The status of the data processing of this page'
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'svg_image'
,
SVGImage
,
subPropertyOf
=
cls
.
HAS_IMAGE
))
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
'svg_text_field'
,
TextField
,
\
name
=
'pageIsOnSVGTextField'
,
label
=
'page is on svg text field'
,
\
comment
=
'Relates a page to the text field on a faksimile image.'
,
subPropertyOf
=
cls
.
PAGE_IS_ON_TEXTFIELD
))
for
key
in
[
'lines'
,
'imprints'
,
'mark_foreign_hands'
,
'words'
,
'word_deletion_paths'
,
'word_insertion_marks'
,
'editor_comments'
]:
properties
.
update
(
cls
.
create_semantic_property_dictionary
(
key
,
list
))
dictionary
.
update
({
cls
.
CLASS_KEY
:
class_dict
})
dictionary
.
update
({
cls
.
PROPERTIES_KEY
:
properties
})
return
cls
.
return_dictionary_after_updating_super_classes
(
dictionary
)
def
get_word_deletion_path
(
self
,
path
=
None
,
d_attribute
=
None
)
->
WordDeletionPath
:
"""Return a word deletion path that belongs to page.
"""
if
path
is
None
and
d_attribute
is
None
:
raise
Exception
(
'ERROR: get_word_deletion_path needs a path or a d_attribute!'
)
if
d_attribute
is
None
:
d_attribute
=
path
.
d_attribute
page_paths
=
[
dpath
for
dpath
in
self
.
word_deletion_paths
if
dpath
.
d_attribute
==
d_attribute
]
if
len
(
page_paths
)
>
0
:
return
page_paths
[
0
]
else
:
dpath
=
WordDeletionPath
.
create_cls
(
self
,
path
=
path
,
d_attribute
=
d_attribute
)
if
dpath
is
not
None
:
dpath
.
id
=
len
(
self
.
word_deletion_paths
)
self
.
word_deletion_paths
.
append
(
dpath
)
dpath
.
attach_object_to_tree
(
self
.
page_tree
)
return
dpath
def
init_node_objects
(
self
):
"""Initialize all node objects.
"""
self
.
word_insertion_marks
=
[
WordInsertionMark
(
wim_node
=
wim_node
)
for
wim_node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'//'
+
WordInsertionMark
.
XML_TAG
)
]
self
.
words
=
[
Word
.
create_cls
(
word_node
)
for
word_node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'./word'
)
]
self
.
mark_foreign_hands
=
[
MarkForeignHands
.
create_cls
(
node
)
for
node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'./'
+
MarkForeignHands
.
XML_TAG
)
]
#self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
self
.
words
+=
[
TextConnectionMark
.
instantiate_as_word
(
node
,
id
=
index
+
len
(
self
.
words
))
\
for
index
,
node
in
enumerate
(
self
.
page_tree
.
getroot
()
.
xpath
(
'//'
+
TextConnectionMark
.
XML_TAG
))
]
self
.
line_numbers
=
[
LineNumber
(
xml_text_node
=
line_number_node
)
for
line_number_node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'//'
+
LineNumber
.
XML_TAG
)
]
self
.
lines
=
[
Line
.
create_cls_from_node
(
node
=
line_number_node
)
for
line_number_node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'//'
+
LineNumber
.
XML_TAG
)
]
self
.
imprints
=
[
Imprint
.
create_cls_from_node
(
imprint_node
,
self
.
lines
)
for
imprint_node
in
self
.
page_tree
.
getroot
()
.
xpath
(
'//'
+
Imprint
.
XML_TAG
)
]
self
.
writing_processes
=
[
WritingProcess
.
create_writing_process_from_xml
(
node
,
self
.
words
)
for
node
in
self
.
page_tree
.
xpath
(
'//'
+
WritingProcess
.
XML_TAG
)
]
self
.
word_deletion_paths
=
[
WordDeletionPath
.
create_cls
(
self
,
node
=
node
)
for
node
in
self
.
page_tree
.
xpath
(
'./'
+
WordDeletionPath
.
XML_TAG
)
]
self
.
editor_comments
=
[
EditorComment
.
create_cls_from_node
(
node
=
node
)
for
node
in
self
.
page_tree
.
xpath
(
'./'
+
EditorComment
.
XML_TAG
)
]
if
self
.
faksimile_image
is
not
None
and
self
.
faksimile_image
.
text_field
is
not
None
:
self
.
faksimile_text_field
=
self
.
faksimile_image
.
text_field
if
self
.
svg_image
is
not
None
and
self
.
svg_image
.
text_field
is
not
None
:
self
.
svg_text_field
=
self
.
svg_image
.
text_field
for
simple_word
in
self
.
words
+
self
.
mark_foreign_hands
+
self
.
text_connection_marks
:
simple_word
.
init_word
(
self
)
for
wim
in
self
.
word_insertion_marks
:
if
wim
.
line_number
>
-
1
:
wim
.
line
=
[
line
for
line
in
self
.
lines
if
line
.
id
==
wim
.
line_number
][
0
]
def
update_and_attach_words2tree
(
self
,
update_function_on_word
=
None
,
include_special_words_of_type
=
[]):
"""Update word ids and attach them to page.page_tree.
"""
if
not
self
.
is_locked
():
update_function_on_word
=
[
update_function_on_word
]
\
if
type
(
update_function_on_word
)
!=
list
\
else
update_function_on_word
for
node
in
self
.
page_tree
.
xpath
(
'.//word|.//'
+
MarkForeignHands
.
XML_TAG
+
'|.//'
+
TextConnectionMark
.
XML_TAG
):
node
.
getparent
()
.
remove
(
node
)
for
index
,
word
in
enumerate
(
self
.
words
):
word
.
id
=
index
for
func
in
update_function_on_word
:
if
callable
(
func
):
func
(
word
)
word
.
attach_word_to_tree
(
self
.
page_tree
)
for
index
,
mark_foreign_hands
in
enumerate
(
self
.
mark_foreign_hands
):
mark_foreign_hands
.
id
=
index
if
MarkForeignHands
in
include_special_words_of_type
:
for
func
in
update_function_on_word
:
if
callable
(
update_function_on_word
):
func
(
mark_foreign_hands
)
mark_foreign_hands
.
attach_word_to_tree
(
self
.
page_tree
)
for
index
,
text_connection_mark
in
enumerate
(
self
.
text_connection_marks
):
text_connection_mark
.
id
=
index
if
TextConnectionMark
in
include_special_words_of_type
:
for
func
in
update_function_on_word
:
if
callable
(
update_function_on_word
):
func
(
text_connection_mark
)
text_connection_mark
.
attach_word_to_tree
(
self
.
page_tree
)
else
:
print
(
'locked'
)
def
update_data_source
(
self
,
faksimile_svgFile
=
None
,
xml_correction_file
=
None
):
"""Update the data source of page.
"""
if
faksimile_svgFile
is
not
None
:
self
.
faksimile_svgFile
=
faksimile_svgFile
data_node
=
self
.
page_tree
.
xpath
(
'.//data-source'
)[
0
]
\
if
len
(
self
.
page_tree
.
xpath
(
'.//data-source'
))
>
0
\
else
ET
.
SubElement
(
self
.
page_tree
.
getroot
(),
'data-source'
)
data_node
.
set
(
'file'
,
self
.
faksimile_svgFile
)
if
xml_correction_file
is
not
None
:
data_node
.
set
(
'xml-corrected-words'
,
xml_correction_file
)
def
update_line_number_area
(
self
,
transkription_field
,
svg_tree
=
None
,
set_to_text_field_zero
=
True
):
"""Determines the width of the area where the line numbers are written in the page.source file.
"""
THRESHOLD
=
0.4
if
svg_tree
is
None
:
svg_tree
=
ET
.
parse
(
self
.
source
)
if
len
(
self
.
line_numbers
)
>
1
:
line_number
=
self
.
line_numbers
[
9
]
\
if
transkription_field
.
is_page_verso
()
and
len
(
self
.
line_numbers
)
>
8
\
else
self
.
line_numbers
[
1
]
ln_nodes
=
[
item
for
item
in
svg_tree
.
iterfind
(
'//text'
,
svg_tree
.
getroot
()
.
nsmap
)
\
if
Matrix
.
IS_NEARX_TRANSKRIPTION_FIELD
(
item
.
get
(
'transform'
),
transkription_field
)
\
and
LineNumber
.
IS_A_LINE_NUMBER
(
item
)
\
and
LineNumber
(
raw_text_node
=
item
)
.
id
==
line_number
.
id
]
if
len
(
ln_nodes
)
>
0
:
matrix
=
Matrix
(
transform_matrix_string
=
ln_nodes
[
0
]
.
get
(
'transform'
))
if
transkription_field
.
is_page_verso
():
transkription_field
.
add_line_number_area_width
(
matrix
.
getX
())
elif
self
.
svg_file
is
not
None
and
isfile
(
self
.
svg_file
):
svg_path_tree
=
ET
.
parse
(
self
.
svg_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_path_tree
.
getroot
()
.
nsmap
.
items
()
}
svg_x
=
matrix
.
getX
()
svg_y
=
self
.
line_numbers
[
1
]
.
bottom
+
transkription_field
.
ymin
\
if
set_to_text_field_zero
\
else
self
.
line_numbers
[
1
]
.
bottom
use_nodes
=
svg_path_tree
.
xpath
(
'//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'
\
.
format
(
svg_x
-
THRESHOLD
,
svg_x
+
THRESHOLD
,
svg_y
-
THRESHOLD
,
svg_y
+
THRESHOLD
),
namespaces
=
namespaces
)
if
len
(
use_nodes
)
>
0
:
symbol_id
=
use_nodes
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
.
replace
(
'#'
,
''
)
d_strings
=
use_nodes
[
0
]
.
xpath
(
'//ns:symbol[@id="{0}"]/ns:path/@d'
.
format
(
symbol_id
),
namespaces
=
namespaces
)
if
len
(
d_strings
)
>
0
and
d_strings
[
0
]
!=
''
:
path
=
parse_path
(
d_strings
[
0
])
xmin
,
xmax
,
ymin
,
ymax
=
path
.
bbox
()
width
=
xmax
-
xmin
transkription_field
.
add_line_number_area_width
(
matrix
.
getX
()
+
width
)
def
update_page_type
(
self
,
transkription_field
=
None
):
"""Adds a source to page and attaches it to page_tree.
"""
if
self
.
number
.
endswith
(
'r'
)
\
or
self
.
number
.
endswith
(
'v'
):
self
.
page_type
=
Page
.
PAGE_VERSO
\
if
self
.
number
.
endswith
(
'v'
)
\
else
Page
.
PAGE_RECTO
else
:
if
transkription_field
is
None
:
if
self
.
source
is
None
or
not
isfile
(
self
.
source
):
raise
FileNotFoundError
(
'Page does not have a source!'
)
transkription_field
=
TranskriptionField
(
self
.
source
,
multipage_index
=
self
.
multipage_index
)
self
.
page_type
=
Page
.
PAGE_VERSO
\
if
transkription_field
.
is_page_verso
()
\
else
Page
.
PAGE_RECTO
self
.
page_tree
.
getroot
()
.
set
(
'pageType'
,
self
.
page_type
)
def
update_styles
(
self
,
words
=
None
,
manuscript
=
None
,
add_to_parents
=
False
,
partition_according_to_styles
=
False
,
create_css
=
False
,
parentsPWPs
=
None
):
"""Update styles of words and add them to their transkription_positions.
Args:
add_to_parents: Add styles also to word (and if not None to manuscript).
partition_according_to_styles: Partition word if its transkription_positions have different styles.
"""
style_dictionary
=
{}
if
words
is
None
:
words
=
self
.
words
for
word
in
words
:
if
len
(
word
.
word_parts
)
>
0
:
self
.
update_styles
(
words
=
word
.
word_parts
,
manuscript
=
manuscript
,
create_css
=
create_css
,
\
add_to_parents
=
add_to_parents
,
partition_according_to_styles
=
partition_according_to_styles
,
parentsPWPs
=
parentsPWPs
)
overwritten
=
[]
if
word
.
overwrites_word
is
None
else
[
word
.
overwrites_word
]
if
word
.
earlier_version
is
not
None
:
overwritten
.
append
(
word
.
earlier_version
)
if
len
(
overwritten
)
>
0
:
parentsPWPs
=
parentsPWPs
if
parentsPWPs
is
not
None
else
[]
if
len
(
parentsPWPs
)
==
0
:
cword
=
word
.
word_parts
[
0
]
if
len
(
word
.
word_parts
)
>
0
else
word
for
tp
in
cword
.
transkription_positions
:
parentsPWPs
+=
tp
.
positional_word_parts
self
.
update_styles
(
words
=
overwritten
,
manuscript
=
manuscript
,
create_css
=
create_css
,
\
add_to_parents
=
add_to_parents
,
partition_according_to_styles
=
partition_according_to_styles
,
parentsPWPs
=
parentsPWPs
)
for
transkription_position
in
word
.
transkription_positions
:
positional_word_parts
=
transkription_position
.
positional_word_parts
\
if
len
(
transkription_position
.
positional_word_parts
)
>
0
\
else
parentsPWPs
if
len
(
positional_word_parts
)
>
0
:
style_class
=
positional_word_parts
[
0
]
.
style_class
writing_process_id
=
-
1
for
font_key
in
[
font_key
for
font_key
in
style_class
.
split
(
' '
)
if
font_key
in
self
.
fontsizekey2stage_mapping
.
keys
()
]:
writing_process_id
=
self
.
fontsizekey2stage_mapping
.
get
(
font_key
)
style_class_key
=
(
Style
.
remove_irrelevant_style_keys
(
style_class
,
self
,
extended_styles
=
create_css
),
writing_process_id
)
if
create_css
:
if
style_dictionary
.
get
((
style_class_key
,
word
.
deleted
))
is
None
:
color
=
None
if
len
(
word
.
deletion_paths
)
>
0
:
if
word
.
deletion_paths
[
0
]
.
style_class
is
not
None
\
and
word
.
deletion_paths
[
0
]
.
style_class
!=
''
\
and
self
.
style_dict
.
get
(
word
.
deletion_paths
[
0
]
.
style_class
)
is
not
None
:
color
=
Color
.
create_cls_from_style_object
(
self
.
style_dict
.
get
(
word
.
deletion_paths
[
0
]
.
style_class
))
else
:
color
=
Color
()
style_dictionary
[(
style_class_key
,
word
.
deleted
)]
=
Style
.
create_cls
(
self
,
style_class_key
[
0
],
manuscript
=
manuscript
,
\
create_css
=
create_css
,
deletion_color
=
color
,
writing_process_id
=
style_class_key
[
1
]
)
transkription_position
.
style
=
style_dictionary
[(
style_class_key
,
word
.
deleted
)]
#print(style_dictionary[(style_class_key, word.deleted)])
else
:
if
style_dictionary
.
get
(
style_class_key
)
is
None
:
style_dictionary
[
style_class_key
]
=
Style
.
create_cls
(
self
,
style_class_key
[
0
],
manuscript
=
manuscript
,
create_css
=
create_css
)
style_dictionary
[
style_class_key
]
.
writing_process_id
=
style_class_key
[
1
]
transkription_position
.
style
=
style_dictionary
[
style_class_key
]
if
add_to_parents
and
transkription_position
.
style
not
in
word
.
styles
:
word
.
styles
.
append
(
transkription_position
.
style
)
if
partition_according_to_styles
:
word
.
split_according_to_status
(
'style'
,
splits_are_parts
=
True
)
if
manuscript
is
not
None
\
and
add_to_parents
:
manuscript
.
update_styles
(
*
style_dictionary
.
values
())
def
__eq__
(
self
,
other
):
"""Returns true if self is qualitatively identical to other.
"""
if
other
is
None
:
return
False
if
self
.
page_tree
is
None
and
other
.
page_tree
is
None
:
return
self
.
number
==
other
.
number
if
self
.
page_tree
is
None
or
other
.
page_tree
is
None
:
return
False
return
self
.
page_tree
.
docinfo
.
URL
==
other
.
page_tree
.
docinfo
.
URL
def
__hash__
(
self
):
"""Return a hash value for self.
"""
try
:
if
self
.
page_tree
is
None
:
return
hash
(
self
.
number
)
except
AttributeError
:
print
(
self
)
return
hash
(
self
.
number
)
return
hash
(
self
.
page_tree
.
docinfo
.
URL
)
Event Timeline
Log In to Comment