Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61994286
word_insertion_mark.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, May 10, 06:46
Size
6 KB
Mime Type
text/x-python
Expires
Sun, May 12, 06:46 (2 d)
Engine
blob
Format
Raw Data
Handle
17588190
Attached To
rNIETZSCHEPYTHON nietzsche-python
word_insertion_mark.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a word insertion mark.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
svgpathtools.parser
import
parse_path
import
warnings
from
.line
import
Line
from
.positional_object
import
PositionalObject
from
.word
import
Word
class
WordInsertionMark
(
PositionalObject
):
"""
This class represents a word insertion mark.
Args:
wim_node (etree.Element): element that contains information about a word_insertion_mark.
OR
id (int): word id
x (float)
y (float)
height (float)
width (float)
previous_word_id (int): id of the word to which word insertion mark is attached
inserted_words: Array->Word of inserted words marked by the word insertion mark.
"""
WARN_NO_GLYPH_ID
=
'No glyph_id found'
XML_TAG
=
'word-insertion-mark'
extraStringKeys
=
[
'mark_type'
,
'symbol_id'
]
def
__init__
(
self
,
wim_node
=
None
,
id
=
0
,
x
=-
1.0
,
y
=-
1.0
,
height
=
0
,
width
=
0
,
previous_word_id
=-
1
,
next_word_id
=-
1
,
line_number
=-
1
,
symbol_id
=
None
,
inserted_words
=
[],
inserted_word_id
=-
1
,
mark_type
=
'A'
):
super
(
WordInsertionMark
,
self
)
.
__init__
(
id
=
id
,
node
=
wim_node
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
tag
=
WordInsertionMark
.
XML_TAG
)
self
.
stringKeys
+=
[
'mark_type'
,
'symbol_id'
]
self
.
intKeys
+=
[
'line_number'
,
'next_word_id'
,
'previous_word_id'
]
self
.
symbol_id
=
symbol_id
self
.
mark_type
=
mark_type
self
.
line_number
=
line_number
self
.
line
=
None
self
.
previous_word_id
=
previous_word_id
self
.
next_word_id
=
next_word_id
if
wim_node
is
not
None
:
self
.
mark_type
=
wim_node
.
get
(
'mark-type'
)
self
.
line_number
=
int
(
wim_node
.
get
(
'line-number'
))
if
bool
(
wim_node
.
get
(
'line-number'
))
else
-
1
self
.
previous_word_id
=
int
(
wim_node
.
get
(
'previous-word-id'
))
if
bool
(
wim_node
.
get
(
'previous-word-id'
))
else
-
1
self
.
next_word_id
=
int
(
wim_node
.
get
(
'next-word-id'
))
if
bool
(
wim_node
.
get
(
'next-word-id'
))
else
-
1
def
init_inserted_words
(
self
,
inserted_words
=
[],
wim_node
=
None
,
inserted_word_id_string
=
None
):
if
wim_node
is
not
None
and
inserted_word_id_string
is
not
None
:
ids
=
inserted_word_id_string
.
split
(
' '
)
inserted_words
=
[
Word
.
CREATE_WORD
(
word_node
=
word_node
)
for
word_node
in
wim_node
.
getroottree
()
.
getroot
()
.
xpath
(
'.//word[@id>="{0}" and @id<="{1}"]'
.
format
(
ids
[
0
],
ids
[
len
(
ids
)
-
1
]))
]
if
len
(
inserted_words
)
>
0
:
for
word
in
inserted_words
:
word
.
set_word_insertion_mark
(
self
)
return
inserted_words
def
attach_and_update_word_if_involved
(
self
,
word
):
if
word
.
id
==
self
.
previous_word_id
:
word
.
is_before_inserted_words
=
True
word
.
word_insertion_mark
=
self
elif
word
.
id
==
self
.
next_word_id
:
word
.
is_after_inserted_words
=
True
word
.
word_insertion_mark
=
self
elif
word
.
id
in
[
inserted
.
id
for
inserted
in
self
.
inserted_words
]:
word
=
[
inserted
for
inserted
in
self
.
inserted_words
if
inserted
.
id
==
word
.
id
][
0
]
return
word
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
super
(
cls
,
cls
)
.
get_semantic_dictionary
()
word_dicts
=
{
key
:
{
'class'
:
Word
,
'cardinality'
:
1
,
'cardinality_restriction'
:
'maxCardinality'
,
\
'label'
:
'has {} word'
.
format
(
key
.
replace
(
'_word_id'
,
''
)),
\
'name'
:
'has{}'
.
format
(
key
.
title
()
.
replace
(
'_Id'
,
''
)
.
replace
(
'_'
,
''
))
}
\
for
key
in
[
'previous_word_id'
,
'next_word_id'
]
}
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
word_dicts
)
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
({
'line'
:
{
'class'
:
Line
,
'cardinality'
:
1
,
\
'name'
:
'wordInsertionMarkBelongsToLine'
,
'label'
:
'word insertion mark belongs to a specific line'
}})
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
cls
.
create_semantic_property_dictionary
(
'mark_type'
,
str
,
cardinality
=
1
))
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
cls
.
create_semantic_property_dictionary
(
'symbol_id'
,
str
,
cardinality
=
1
,
cardinality_restriction
=
'maxCardinality'
))
return
cls
.
return_dictionary_after_updating_super_classes
(
dictionary
)
@staticmethod
def
CREATE_WORD_INSERTION_MARK
(
svg_path_tree
,
namespaces
,
id
=
0
,
x
=
0.0
,
y
=
0.0
,
xmin
=
0.0
,
ymin
=
0.0
,
line_number
=-
1
,
mark_type
=
'A'
):
"""Creates a (datatypes.word_insertion_mark) WordInsertionMark
using a (lxml.ElementTree) svg_path_tree and the corresponding namespaces.
"""
THRESHOLD
=
0.4
svg_x
=
x
+
xmin
svg_y
=
y
+
ymin
use_nodes
=
svg_path_tree
.
xpath
(
'//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'
\
.
format
(
svg_x
-
THRESHOLD
,
svg_x
+
THRESHOLD
,
svg_y
-
THRESHOLD
,
svg_y
+
THRESHOLD
),
namespaces
=
namespaces
)
if
len
(
use_nodes
)
>
0
:
symbol_id
=
use_nodes
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
.
replace
(
'#'
,
''
)
d_strings
=
use_nodes
[
0
]
.
xpath
(
'//ns:symbol[@id="{0}"]/ns:path/@d'
.
format
(
symbol_id
),
namespaces
=
namespaces
)
height
=
0.0
width
=
0.0
if
len
(
d_strings
)
>
0
and
d_strings
[
0
]
!=
''
:
path
=
parse_path
(
d_strings
[
0
])
xmin
,
xmax
,
ymin
,
ymax
=
path
.
bbox
()
width
=
xmax
-
xmin
height
=
ymax
-
ymin
return
WordInsertionMark
(
id
=
id
,
x
=
x
,
y
=
y
-
height
,
height
=
height
,
width
=
width
,
line_number
=
line_number
,
\
mark_type
=
mark_type
,
symbol_id
=
symbol_id
)
else
:
warnings
.
warn
(
'{} for word insertion mark {} on line {}'
.
format
(
WordInsertionMark
.
WARN_NO_GLYPH_ID
,
id
,
line_number
))
return
WordInsertionMark
(
id
=
id
,
x
=
x
,
y
=
y
,
line_number
=
line_number
,
mark_type
=
mark_type
)
Event Timeline
Log In to Comment