p = subprocess.Popen([PDF_READER, pdf_file], stdout=DEVNULL, stderr=DEVNULL, preexec_fn=os.setsid)
subprocess.run([SVG_EDITOR, faksimile_svg_file])
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
DEVNULL.close()
def sort_words(page):
"""Returns sorted words (from top left to bottom right).
"""
if -1 in [ word.line_number for word in page.words ]:
warnings.warn('{} in page file {} for word with ids {}'.format(LineNumber.WARN_NO_LINE_NUMBER, page.page_tree.docinfo.URL, page.page_tree.xpath('//word[not(@line-number)]/@id')))
words = []
for line_number in page.line_numbers:
word_on_line = [ word for word in page.words if word.line_number == line_number.id ]
if line_number.id % 2 == 0:
words += sorted(word_on_line, key=lambda word: word.transkription_positions[0].left)
else:
words += sorted(word_on_line, key=cmp_to_key(\
lambda wordA, wordB: -1\
if wordA.transkription_positions[0].left < wordB.transkription_positions[0].left\
and abs(wordA.transkription_positions[0].bottom-wordB.transkription_positions[0].bottom) < wordA.transkription_positions[0].height/2\
"""This program can be used to merge the data of some faksimile pages with the data of xml files that are of type myxmlwriter.FILE_TYPE_SVG_WORD_POSITION.