Page MenuHomec4science

get_text_field.py
No OneTemporary

File Metadata

Created
Sun, Sep 1, 11:29

get_text_field.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create svg files with a rect for the text_field.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
import getopt
import json
import lxml.etree as ET
import shutil
import subprocess
import sys
import os
import wget
import cv2
import matplotlib.pyplot as plt
import numpy as np
from os import listdir, sep, path, setpgrp, devnull, mkdir, remove
from os.path import exists, isfile, isdir, dirname, basename
from progress.bar import Bar
import warnings
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from fix_old_data import save_page
sys.path.append('svgscripts')
from datatypes.faksimile_image import FaksimileImage
from datatypes.faksimile import FaksimilePage
from datatypes.archival_manuscript import ArchivalManuscriptUnity
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.text_field import TextField
from util import back_up, back_up_svg_file, copy_faksimile_update_image_location, copy_faksimile_svg_file
from process_files import update_svgposfile_status
from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR
sys.path.append('shared_util')
from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
from main_util import create_function_dictionary
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
THRESHOLD = 60
GRAY_THRESHOLD = 200
BLANK_STATUS = 'blank'
def get_text_field_on_thumb(image_file: str, id=0) ->(TextField, float, float):
"""Find the area of of the faksimile image where a manuscript page is displayed
and return it as a TextField.
"""
image = cv2.imread(image_file)
blur = cv2.GaussianBlur(image, (3,3), 0)
#blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)[1]
original_height, original_width, channel = image.shape
top, bottom = get_start_and_end_index(thresh)
height = bottom-top
left, right = get_start_and_end_index(thresh.T)
width = right-left
return (TextField(id=id, x=left, y=top, width=width, height=height), original_width, original_height)
def lessThan(x):
return x < GRAY_THRESHOLD
def get_text_field_on_image(image_file: str, image_width: float, image_height: float, id=0) ->TextField:
"""Find the area of of the faksimile image where a manuscript page is displayed
and return it as a TextField.
"""
if not isfile(image_file):
raise FileNotFoundError(f'file {image_file} not found!')
image = cv2.imread(image_file)
blur = cv2.GaussianBlur(image, (3,3), 0)
#blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
less = np.frompyfunc(lessThan, 1, 1)
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)[1]\
if False in less(gray[THRESHOLD])\
else cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
original_height, original_width, channel = image.shape
startLine, lastLine = get_start_and_end_index(thresh)
top = round(startLine*image_height/original_height, 1)
bottom = round(lastLine*image_height/original_height, 1)
height = bottom-top
startLine, lastLine = get_start_and_end_index(thresh.T)
left = round(startLine*image_width/original_width, 1)
right = round(lastLine*image_width/original_width, 1)
width = right-left
return TextField(id=id, x=left, y=top, width=width, height=height)
def get_start_and_end_index_gray(image, thresholdSum) ->(int, int):
"""
"""
startLine = -1
lastLine = -1
for id, line in enumerate(image):
if startLine == -1 and np.sum(line) < thresholdSum:
if id-lastLine < THRESHOLD:
lastLine = id
else:
startLine = lastLine
lastLine = id
break
return startLine, lastLine
def get_start_and_end_index(thresh) ->(int, int):
"""
"""
startLine = -1
lastLine = -1
for id, line in enumerate(thresh):
if startLine == -1 and np.sum(line) == 0:
if id-lastLine < THRESHOLD:
lastLine = id
else:
startLine = lastLine
lastLine = id
break
return startLine, lastLine
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to create svg files with a rect for the text_field.
fixes/get_text_field.py [OPTIONS] <faksimile-image> width height
<faksimile-image> the directory where the files should be saved to
OPTIONS:
-h|--help show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 3:
usage()
return 2
exit_status = 0
image_file = args[0]
image_width = float(args[1])
image_height = float(args[2])
if isfile(image_file):
textfield = get_text_field_on_image(image_file, image_width, image_height)
print(textfield)
else:
raise FileNotFoundError(f'File {image_file} does not exist!')
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline