Page MenuHomec4science

extract_footnotes.py
No OneTemporary

File Metadata

Created
Fri, Mar 29, 09:56

extract_footnotes.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to extract footnotes from a svg file.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import getopt
import re
import sys
from os import listdir, sep, path
from os.path import isfile, isdir, dirname
import lxml.etree as ET
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from datatypes.matrix import Matrix
from datatypes.page import Page
from datatypes.transkriptionField import TranskriptionField
from datatypes.footnotes import extract_footnotes
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to extract footnotes from a svg file.
svgscripts/extract_footnotes.py [OPTIONS] <xml_svg_pos_file>
<xml_svg_pos_file> a xml file containing information about the position of the svg words.
OPTIONS:
-h|--help: show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 1:
usage()
return 2
exit_status = 0
svg_file = args[0]
if svg_file.endswith('xml'):
page = Page(svg_file)
svg_file = page.source
else:
usage()
return 2
footnotes = extract_footnotes(page, svg_file=svg_file)
for footnote in footnotes:
print(footnote.content)
for markup in footnote.standoff_markups:
print(f'->{markup.markup}, start:{markup.startIndex}, end:{markup.endIndex}')
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline