Page MenuHomec4science

crop_rotate_thumbs.py
No OneTemporary

File Metadata

Created
Sun, May 5, 12:48

crop_rotate_thumbs.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to crop and/or rotate thumb images.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
import cv2
import getopt
import json
import lxml.etree as ET
from numpy import ndarray
import shutil
import subprocess
import sys
import os
import wget
from os import listdir, sep, path, setpgrp, devnull, mkdir, remove
from os.path import exists, isfile, isdir, dirname, basename
from progress.bar import Bar
import warnings
from fix_old_data import save_page
from get_text_field import get_text_field_on_image, get_text_field_on_thumb
sys.path.append('svgscripts')
from datatypes.faksimile_image import FaksimileImage
from datatypes.faksimile import FaksimilePage
from datatypes.archival_manuscript import ArchivalManuscriptUnity
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.matrix import Matrix
from util import back_up, back_up_svg_file, copy_faksimile_update_image_location, copy_faksimile_svg_file
from process_files import update_svgposfile_status
from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR
sys.path.append('shared_util')
from myxmlwriter import copy_to_bak_dir, write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
from main_util import create_function_dictionary
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
MAX_SVG_XY_THRESHOLD = 10
BLANK_STATUS = 'blank'
class ImageManipulator:
"""This class can be used in order to create svg files with textfield rects.
"""
UNITTESTING = False
def __init__(self, faksimile_dir=None, fix_doubles=False):
self.faksimile_dir = faksimile_dir
self.fix_doubles = fix_doubles
def _crop_image(self, image: ndarray, image_width: float, image_height: float, x: float, y: float, height: float, width: float) ->ndarray:
"""Crop image. Return cropped image.
"""
original_height, original_width, channel = image.shape
image_x = round(x*original_width/image_width)
image_w = round(width*original_width/image_width)
image_y = round(y*original_height/image_height)
image_h = round(height*original_height/image_height)
return image[image_y:image_y+image_h,image_x:image_x+image_w]
def crop_according2svg(self, image: ndarray, thumb_svg: str) -> ndarray:
"""Crop according to svg and return image.
"""
svg_tree = ET.parse(thumb_svg)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
images = svg_tree.xpath('//ns:image', namespaces=namespaces)
if len(images) > 0:
image_width = float(images[0].get('width')) if bool(images[0].get('width')) else 0
image_height = float(images[0].get('height')) if bool(images[0].get('height')) else 0
viewBox = svg_tree.getroot().get('viewBox') if bool(svg_tree.getroot().get('viewBox')) else '0 0 0 0'
x, y, width, height = [ float(i) for i in viewBox.split(' ') ]
return self._crop_image(image, image_width, image_height, x, y, height, width)
else:
return image
def rotate_according2transform(self, image, transform) ->ndarray:
"""Rotate image according to transform
"""
rotation_flags = [ cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_180, cv2.ROTATE_90_COUNTERCLOCKWISE ]
matrix = Matrix(transform)
mindex = matrix.get90DegreeIndex()
if mindex != -1:
return cv2.rotate(image, rotation_flags[mindex])
return image
def process_thumb_of_page(self, page_file: str) -> int:
""" Create a svg file.
[return] exit_status
"""
page = Page.create_cls(page_file)
faksimile_nodes = page.page_tree.xpath('//faksimile-image')
if len(faksimile_nodes) > 0:
transform = faksimile_nodes[0].get('transform')
thumb_image = self.faksimile_dir + sep + faksimile_nodes[0].get('file-name').replace('.jpg', '_thumb.jpg')
thumb_svg = thumb_image.replace('.jpg', '.svg')
if bool(faksimile_nodes[0].get('thumb')):
thumb_svg = self.faksimile_dir + sep + faksimile_nodes[0].get('thumb')
if thumb_svg.endswith('jpg'):
thumb_svg = thumb_image.replace('.jpg', '.svg')
if self.fix_doubles and 'et' in basename(thumb_svg):
base_thumb_svg = basename(thumb_svg)
thumb_svg = self.faksimile_dir + sep + base_thumb_svg[:base_thumb_svg.index('et')] + '_thumb.svg'
if isfile(thumb_svg) or bool(transform):
target_extension = ''
image = cv2.imread(thumb_image)
if isfile(thumb_svg):
image = self.crop_according2svg(image, thumb_svg)
target_extension = '_cropped'
if bool(transform):
image = self.rotate_according2transform(image, transform)
target_extension = target_extension + '_rotated'
if target_extension != '':
target_file = thumb_image.replace('_thumb', target_extension + '_thumb')
if 'et' in basename(thumb_image):
base_thumb_image = basename(thumb_image)
if page.faksimile_image.text_field.left < (page.faksimile_image.width-page.faksimile_image.text_field.left)/2:
target_file = self.faksimile_dir + sep + base_thumb_image[:base_thumb_image.index('et')] + target_extension + '_thumb.jpg'
else:
target_file = self.faksimile_dir + sep \
+ base_thumb_image[:base_thumb_image.index('et')-2]\
+ base_thumb_image[base_thumb_image.index('et')+2:].replace('_thumb', target_extension + '_thumb')
cv2.imwrite(target_file, image)
faksimile_nodes[0].set('thumb', basename(target_file))
save_page(page, backup=True)
return 0
return 1
else:
print(page_file)
return 2
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to crop and/or rotate thumb images.
fixes/crop_rotate_thumbs.py [OPTIONS] <xmlManuscriptFile|svg_pos_file> <faksimile-dir>
<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
<svg_pos_file> a xml file about a page, containing information about svg word positions.
<faksimile-dir> a directory containing faksimile images
OPTIONS:
-h|--help show help
-d|--fix-doubles fix double pages
:return: exit code (int)
"""
fix_doubles = False
try:
opts, args = getopt.getopt(argv, "hd", ["help", "fix-doubles"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-d', '--fix-doubles'):
fix_doubles = True
exit_status = 0
if len(args) < 2:
usage()
return 2
xml_file = args[0]
faksimile_dir = args[1]
if isfile(xml_file) and isdir(faksimile_dir):
counter = 0
xpath = f'//page'
manuscript_file = xml_file
if len(xml_file.split('_')) > 2: # svg_pos_file
manuscript_file = '_'.join(xml_file.split('_')[0:2]) + '.xml'
if isfile(manuscript_file):
source_tree = ET.parse(manuscript_file)
xpath = f'//page[contains(@output,"{xml_file}")]'
else:
raise FileNotFoundError(f'There is no manuscript file {manuscript_file} for svg_pos_file {xml_file}!')
else:
source_tree = ET.parse(xml_file)
image_manipulator = ImageManipulator(faksimile_dir, fix_doubles=fix_doubles)
title = basename(manuscript_file).replace('.xml','').replace('_',' ')
for page in source_tree.xpath(xpath):
if not ImageManipulator.UNITTESTING:
number = page.get('number')
print(Fore.CYAN + f'Processing thumb image of {title}, {number} ...' + Style.RESET_ALL)
if image_manipulator.process_thumb_of_page(page.get('output')) == 0:
counter += 1
if not ImageManipulator.UNITTESTING:
print(Style.RESET_ALL + f'[{counter} images changed]')
else:
if not isdir(faksimile_dir):
raise FileNotFoundError(f'Directory {faksimile_dir} does not exist!')
raise FileNotFoundError('File {} does not exist!'.format(xml_file))
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline