create_folio_name.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, May 7, 11:21

create_folio_name.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""This program can be used to add a name for a folio.
	"""
	# Copyright (C) University of Basel 2021 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	from colorama import Fore, Style
	import csv
	import getopt
	import json
	import lxml.etree as ET
	import shutil
	import subprocess
	import sys
	import os
	import wget
	from os import listdir, sep, path, setpgrp, devnull, mkdir, remove
	from os.path import exists, isfile, isdir, dirname, basename
	from progress.bar import Bar
	import re
	import warnings

	from fix_old_data import save_page
	from get_text_field import get_text_field_on_image

	sys.path.append('svgscripts')
	from datatypes.faksimile_image import FaksimileImage
	from datatypes.faksimile import FaksimilePage
	from datatypes.matrix import Matrix
	from datatypes.archival_manuscript import ArchivalManuscriptUnity
	from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
	from util import back_up, back_up_svg_file, copy_faksimile_update_image_location, copy_faksimile_svg_file
	from process_files import update_svgposfile_status
	from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR

	sys.path.append('shared_util')
	from myxmlwriter import copy_to_bak_dir, write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
	from main_util import create_function_dictionary


	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	UNITTESTING = False
	NAME = 'folio-name'
	DESCRIPTION_KEY = 'description'
	ALIAS = 'alias'
	LABEL = 'label'
	URL = 'nietzschesource-url'

	def _create_name(current_title, tmp_items, includeTitle=True) ->str:
	"""Return a name.
	"""
	if not includeTitle:
	return '_'.join([ tmp_row[ALIAS].replace(current_title, '').replace(',','').replace(' ', '') for tmp_row in tmp_items if 'thumb' not in tmp_row[LABEL] ])
	return current_title.replace(' ','-') + ',' + '_'.join([ tmp_row[ALIAS].replace(current_title, '').replace(',','').replace(' ', '') for tmp_row in tmp_items if 'thumb' not in tmp_row[LABEL] ])

	def _update_old_name(current_title, name_dictionary, last_description, tmp_items, items):
	folio_name = _create_name(current_title, tmp_items)
	if bool(name_dictionary.get(current_title + last_description)):
	last_folio_name = name_dictionary[current_title + last_description]
	folio_name = last_folio_name + '_' + _create_name(current_title, tmp_items, includeTitle=False)
	for old_row in [ tmp_row for tmp_row in items if tmp_row[NAME] == last_folio_name ]:
	old_row[NAME] = folio_name
	name_dictionary.update({ current_title + last_description: folio_name})
	for tmp_row in tmp_items:
	tmp_row[NAME] = folio_name
	items.append(tmp_row)

	def process_csv_file(csv_file_name):
	"""Process the csv_file: create a name for folios.
	"""
	items = []
	fieldnames = []
	current_title = None
	with open(csv_file_name, newline='') as csvfile:
	reader = csv.DictReader(csvfile)
	fieldnames = reader.fieldnames
	last_description = None
	tmp_items = []
	name_dictionary = {}
	if len([ key for key in reader.fieldnames if DESCRIPTION_KEY in key ]) > 0:
	for row in reader:
	if current_title != row[ALIAS][:row[ALIAS].index(',')]:
	if len(tmp_items) > 0:
	_update_old_name(current_title, name_dictionary, last_description, tmp_items, items)
	tmp_items = []
	current_title = row[ALIAS][:row[ALIAS].index(',')]
	last_description = row[DESCRIPTION_KEY]
	tmp_items.append(row)
	elif bool(row[DESCRIPTION_KEY]):
	if row[DESCRIPTION_KEY] != last_description:
	if len(tmp_items) > 0:
	_update_old_name(current_title, name_dictionary, last_description, tmp_items, items)
	last_description = row[DESCRIPTION_KEY]
	tmp_items = [ row ]
	else:
	tmp_items.append(row)
	else:
	return 2
	if len(tmp_items) > 0:
	_update_old_name(current_title,name_dictionary, last_description, tmp_items, items)
	target_csv_file = 'updated_' + csv_file_name
	with open(target_csv_file, 'w', newline='') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	writer.writeheader()
	for row in items:
	writer.writerow(row)
	return 0

	def process_fix_url(csv_file_name):
	"""Process the csv_file: create a name for folios.
	"""
	NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
	THUMB_URL_PREFIX = 'http://www.nietzschesource.org/DFGAapi/images/DFGA/'
	items = []
	fieldnames = []
	with open(csv_file_name, newline='') as csvfile:
	reader = csv.DictReader(csvfile)
	fieldnames = reader.fieldnames
	for row in reader:
	if not '_thumb' in row[LABEL] and not row[URL].endswith(row[LABEL].replace('.jpg','')):
	row[URL] = NIETZSCHE_SOURCES_URL + row[LABEL].replace('.jpg','')
	elif '_thumb' in row[LABEL] and row[URL].startswith(NIETZSCHE_SOURCES_URL):
	title = row[LABEL][:row[LABEL].index(',')]
	row[URL] = THUMB_URL_PREFIX + title + '/mini/' + row[LABEL].replace('_thumb','')
	items.append(row)
	target_csv_file = 'url_fixed_' + csv_file_name
	with open(target_csv_file, 'w', newline='') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	writer.writeheader()
	for row in items:
	writer.writerow(row)
	return 0



	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to add or update a faksimile image to a page xml file.

	fixes/create_folio_name.py [OPTIONS] <csv-file>

	<csv-file> a csv-file containing a folio descriptions.

	OPTIONS:
	-h\|--help show help
	-f\|--fix-url fix image url

	:return: exit code (int)
	"""
	fix_url = False
	try:
	opts, args = getopt.getopt(argv, "hf", ["help", "fix-url"])
	except getopt.GetoptError:
	usage()
	return 2
	for opt, arg in opts:
	if opt in ('-h', '--help'):
	usage()
	return 0
	elif opt in ('-f', '--fix-url'):
	fix_url = True
	exit_status = 0
	if len(args) < 1:
	usage()
	return 2
	csv_file = args[0]
	if isfile(csv_file):
	if fix_url:
	process_fix_url(csv_file)
	else:
	process_csv_file(csv_file)
	else:
	raise FileNotFoundError('File {csv_file} does not exist!')
	return exit_status

	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

create_folio_name.pyNo OneTemporaryActions

File Metadata

create_folio_name.pyView Options

Event Timeline

create_folio_name.py
No OneTemporary
Actions

create_folio_name.py
View Options