Page MenuHomec4science

word_insertion_mark.py
No OneTemporary

File Metadata

Created
Fri, May 10, 06:46

word_insertion_mark.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a word insertion mark.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from svgpathtools.parser import parse_path
import warnings
from .line import Line
from .positional_object import PositionalObject
from .word import Word
class WordInsertionMark(PositionalObject):
"""
This class represents a word insertion mark.
Args:
wim_node (etree.Element): element that contains information about a word_insertion_mark.
OR
id (int): word id
x (float)
y (float)
height (float)
width (float)
previous_word_id (int): id of the word to which word insertion mark is attached
inserted_words: Array->Word of inserted words marked by the word insertion mark.
"""
WARN_NO_GLYPH_ID = 'No glyph_id found'
XML_TAG = 'word-insertion-mark'
extraStringKeys = [ 'mark_type', 'symbol_id' ]
def __init__(self, wim_node=None, id=0, x=-1.0, y=-1.0, height=0, width=0, previous_word_id=-1, next_word_id=-1, line_number=-1, symbol_id=None, inserted_words=[], inserted_word_id=-1, mark_type='A'):
super(WordInsertionMark, self).__init__(id=id, node=wim_node, height=height, width=width, x=x, y=y, tag=WordInsertionMark.XML_TAG)
self.stringKeys += [ 'mark_type', 'symbol_id' ]
self.intKeys += [ 'line_number', 'next_word_id', 'previous_word_id' ]
self.symbol_id = symbol_id
self.mark_type = mark_type
self.line_number = line_number
self.line = None
self.previous_word_id = previous_word_id
self.next_word_id = next_word_id
if wim_node is not None:
self.mark_type = wim_node.get('mark-type')
self.line_number = int(wim_node.get('line-number')) if bool(wim_node.get('line-number')) else -1
self.previous_word_id = int(wim_node.get('previous-word-id')) if bool(wim_node.get('previous-word-id')) else -1
self.next_word_id = int(wim_node.get('next-word-id')) if bool(wim_node.get('next-word-id')) else -1
def init_inserted_words(self, inserted_words=[], wim_node=None, inserted_word_id_string=None):
if wim_node is not None and inserted_word_id_string is not None:
ids = inserted_word_id_string.split(' ')
inserted_words = [ Word.CREATE_WORD(word_node=word_node) for word_node in wim_node.getroottree().getroot().xpath('.//word[@id>="{0}" and @id<="{1}"]'.format(ids[0], ids[len(ids)-1])) ]
if len(inserted_words) > 0:
for word in inserted_words:
word.set_word_insertion_mark(self)
return inserted_words
def attach_and_update_word_if_involved(self, word):
if word.id == self.previous_word_id:
word.is_before_inserted_words = True
word.word_insertion_mark = self
elif word.id == self.next_word_id:
word.is_after_inserted_words = True
word.word_insertion_mark = self
elif word.id in [ inserted.id for inserted in self.inserted_words ]:
word = [ inserted for inserted in self.inserted_words if inserted.id == word.id ][0]
return word
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(cls,cls).get_semantic_dictionary()
word_dicts = { key: { 'class': Word, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality',\
'label': 'has {} word'.format(key.replace('_word_id','')),\
'name': 'has{}'.format(key.title().replace('_Id','').replace('_','')) }\
for key in [ 'previous_word_id', 'next_word_id' ] }
dictionary[cls.PROPERTIES_KEY].update(word_dicts)
dictionary[cls.PROPERTIES_KEY].update({'line': {'class': Line, 'cardinality': 1,\
'name': 'wordInsertionMarkBelongsToLine', 'label': 'word insertion mark belongs to a specific line'}})
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('mark_type', str, cardinality=1))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('symbol_id', str, cardinality=1, cardinality_restriction='maxCardinality'))
return cls.return_dictionary_after_updating_super_classes(dictionary)
@staticmethod
def CREATE_WORD_INSERTION_MARK(svg_path_tree, namespaces, id=0, x=0.0, y=0.0, xmin=0.0, ymin=0.0, line_number=-1, mark_type='A'):
"""Creates a (datatypes.word_insertion_mark) WordInsertionMark
using a (lxml.ElementTree) svg_path_tree and the corresponding namespaces.
"""
THRESHOLD = 0.4
svg_x = x + xmin
svg_y = y + ymin
use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
if len(use_nodes) > 0:
symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
height = 0.0
width = 0.0
if len(d_strings) > 0 and d_strings[0] != '':
path = parse_path(d_strings[0])
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
height = ymax - ymin
return WordInsertionMark(id=id, x=x, y=y-height, height=height, width=width, line_number=line_number,\
mark_type=mark_type, symbol_id=symbol_id)
else:
warnings.warn('{} for word insertion mark {} on line {}'.format(WordInsertionMark.WARN_NO_GLYPH_ID, id, line_number))
return WordInsertionMark(id=id, x=x, y=y, line_number=line_number, mark_type=mark_type)

Event Timeline