get_text_field.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Jul 13, 08:08

get_text_field.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This program can be used to create svg files with a rect for the text_field.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	from colorama import Fore, Style
	import getopt
	import json
	import lxml.etree as ET
	import shutil
	import subprocess
	import sys
	import os
	import wget
	import cv2
	import matplotlib.pyplot as plt
	import numpy as np
	from os import listdir, sep, path, setpgrp, devnull, mkdir, remove
	from os.path import exists, isfile, isdir, dirname, basename
	from progress.bar import Bar
	import warnings

	if dirname(__file__) not in sys.path:
	sys.path.append(dirname(__file__))
	from fix_old_data import save_page

	sys.path.append('svgscripts')
	from datatypes.faksimile_image import FaksimileImage
	from datatypes.faksimile import FaksimilePage
	from datatypes.archival_manuscript import ArchivalManuscriptUnity
	from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
	from datatypes.text_field import TextField
	from util import back_up, back_up_svg_file, copy_faksimile_update_image_location, copy_faksimile_svg_file
	from process_files import update_svgposfile_status
	from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR

	sys.path.append('shared_util')
	from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
	from main_util import create_function_dictionary


	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	THRESHOLD = 60
	GRAY_THRESHOLD = 200
	BLANK_STATUS = 'blank'

	def get_text_field_on_thumb(image_file: str, id=0) ->(TextField, float, float):
	"""Find the area of of the faksimile image where a manuscript page is displayed
	and return it as a TextField.
	"""
	image = cv2.imread(image_file)
	blur = cv2.GaussianBlur(image, (3,3), 0)
	#blur = cv2.bilateralFilter(image,9,75,75)
	gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
	thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)[1]
	original_height, original_width, channel = image.shape
	top, bottom = get_start_and_end_index(thresh)
	height = bottom-top
	left, right = get_start_and_end_index(thresh.T)
	width = right-left
	return (TextField(id=id, x=left, y=top, width=width, height=height), original_width, original_height)

	def lessThan(x):
	return x < GRAY_THRESHOLD

	def get_text_field_on_image(image_file: str, image_width: float, image_height: float, id=0) ->TextField:
	"""Find the area of of the faksimile image where a manuscript page is displayed
	and return it as a TextField.
	"""
	if not isfile(image_file):
	raise FileNotFoundError(f'file {image_file} not found!')
	image = cv2.imread(image_file)
	blur = cv2.GaussianBlur(image, (3,3), 0)
	#blur = cv2.bilateralFilter(image,9,75,75)
	gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
	less = np.frompyfunc(lessThan, 1, 1)
	thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)[1]\
	if False in less(gray[THRESHOLD])\
	else cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
	original_height, original_width, channel = image.shape
	startLine, lastLine = get_start_and_end_index(thresh)
	top = round(startLine*image_height/original_height, 1)
	bottom = round(lastLine*image_height/original_height, 1)
	height = bottom-top
	startLine, lastLine = get_start_and_end_index(thresh.T)
	left = round(startLine*image_width/original_width, 1)
	right = round(lastLine*image_width/original_width, 1)
	width = right-left
	return TextField(id=id, x=left, y=top, width=width, height=height)

	def get_start_and_end_index_gray(image, thresholdSum) ->(int, int):
	"""
	"""
	startLine = -1
	lastLine = -1
	for id, line in enumerate(image):
	if startLine == -1 and np.sum(line) < thresholdSum:
	if id-lastLine < THRESHOLD:
	lastLine = id
	else:
	startLine = lastLine
	lastLine = id
	break
	return startLine, lastLine

	def get_start_and_end_index(thresh) ->(int, int):
	"""
	"""
	startLine = -1
	lastLine = -1
	for id, line in enumerate(thresh):
	if startLine == -1 and np.sum(line) == 0:
	if id-lastLine < THRESHOLD:
	lastLine = id
	else:
	startLine = lastLine
	lastLine = id
	break
	return startLine, lastLine

	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to create svg files with a rect for the text_field.

	fixes/get_text_field.py [OPTIONS] <faksimile-image> width height

	<faksimile-image> the directory where the files should be saved to

	OPTIONS:
	-h\|--help show help

	:return: exit code (int)
	"""
	try:
	opts, args = getopt.getopt(argv, "h", ["help"])
	except getopt.GetoptError:
	usage()
	return 2
	for opt, arg in opts:
	if opt in ('-h', '--help'):
	usage()
	return 0
	if len(args) < 3:
	usage()
	return 2
	exit_status = 0
	image_file = args[0]
	image_width = float(args[1])
	image_height = float(args[2])
	if isfile(image_file):
	textfield = get_text_field_on_image(image_file, image_width, image_height)
	print(textfield)
	else:
	raise FileNotFoundError(f'File {image_file} does not exist!')
	return exit_status

	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

get_text_field.pyNo OneTemporaryActions

File Metadata

get_text_field.pyView Options

Event Timeline

get_text_field.py
No OneTemporary
Actions

get_text_field.py
View Options