File Metadata

Created: Sat, Jul 12, 15:53

extractAndConvert.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This program can be used to start extraction of word position and showing the word positions to HTML for testing purposes.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	import getopt
	import re
	import sys
	from os import sep, path
	from os.path import isfile, dirname
	import lxml.etree as ET

	from extractWordPosition import Extractor
	from convert_wordPositions import HTMLConverter

	sys.path.append('shared_util')
	from myxmlwriter import write_pretty

	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to start extraction of word position and showing the word positions to HTML for testing purposes.


	svgscripts/extractAndConvert.py [OPTIONS] <file>

	<file> svg file OR xml target file containing file name of svg file as "/page/@source".

	OPTIONS:
	-h\|--help: show help
	-s\|--svg=svgFile: svg web file
	-H\|--HTML [default] convert to HTML test file
	-x\|--xml-target-file=xmlOutputFile: target file
	-p\|--page=pageNumber: page number of the current page. For use with _one_ file only.
	-P\|--PDF=pdfFile: pdf file - used for word correction
	-t\|--title=title: title of the manuscript to which the current page(s) belong(s)

	:return: exit code (int)
	"""
	convert_to_type = 'HTML'
	file_name = None
	non_testing = True
	page = None
	page_number = None
	pdfFile = None
	svg_file = None
	title = None
	xml_dir = ".{}xml".format(sep)
	xml_target_file = None

	try:
	opts, args = getopt.getopt(argv, "hTHt:p:s:x:P:", ["help", "Testing", "HTML", "title=", "page=", "svg=", "xml-target-file=", "PDF="])
	except getopt.GetoptError:
	usage()
	return 2

	for opt, arg in opts:
	if opt in ('-h', '--help'):
	usage()
	return 0
	elif opt in ('-T', '--Testing'):
	non_testing = False
	elif opt in ('-t', '--title'):
	title = arg
	elif opt in ('-p', '--page'):
	page_number = str(arg)
	elif opt in ('-s', '--svg'):
	svg_file = arg
	elif opt in ('-P', '--PDF'):
	pdfFile = arg
	elif opt in ('-x', '--xml-target-file'):
	xml_target_file = str(arg)

	if len(args) < 1 or args[0].endswith('xml'):
	if xml_target_file is None:
	xml_target_file = args[0] if len(args) > 0 else None
	if xml_target_file is not None and isfile(xml_target_file):
	xml_dir = dirname(xml_target_file)
	target_file_tree = ET.parse(xml_target_file)
	file_name = target_file_tree.getroot().get('source')
	title = target_file_tree.getroot().get('title') if title is None else title
	page_number = target_file_tree.getroot().get('number') if page_number is None else page_number
	if svg_file is None:
	if len(target_file_tree.xpath('//svg-image')) > 0:
	svg_file = target_file_tree.xpath('.//svg-image/@file-name')[0]\
	if len(target_file_tree.xpath('.//svg-image/@file-name')) > 0 else None
	else:
	svg_file = target_file_tree.xpath('.//svg/@file')[0]\
	if len(target_file_tree.xpath('.//svg/@file')) > 0 else None
	else:
	file_name = args[0]

	if file_name is None or not isfile(file_name):
	print("'{}' does not exist!".format(file_name)) if (file_name is not None) else usage()
	return 2

	extractor = Extractor(xml_dir=xml_dir, title=title)
	page = extractor.extract_information(file_name, page_number=page_number, xml_target_file=xml_target_file, pdfFile=pdfFile, svg_file=svg_file)
	if page.svg_file is None:
	print('Please specify a svg file!')
	usage()
	return 2
	converter = HTMLConverter(page, non_testing=non_testing)
	converter.convert()
	if xml_target_file is not None:
	xml_target_file = xml_dir + sep + path.basename(xml_target_file)
	page.page_tree.getroot().set('source', file_name)
	write_pretty(xml_element_tree=page.page_tree, file_name=xml_target_file, script_name=__file__, file_type='svgWordPosition')
	return 0

	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

extractAndConvert.py
No OneTemporary
Actions

File Metadata

extractAndConvert.py
View Options

Event Timeline

extractAndConvert.pyNo OneTemporaryActions

File Metadata

extractAndConvert.pyView Options

Event Timeline

extractAndConvert.py
No OneTemporary
Actions

extractAndConvert.py
View Options