Page MenuHomec4science

create_folio_name.py
No OneTemporary

File Metadata

Created
Tue, May 7, 11:21

create_folio_name.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""This program can be used to add a name for a folio.
"""
# Copyright (C) University of Basel 2021 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
import csv
import getopt
import json
import lxml.etree as ET
import shutil
import subprocess
import sys
import os
import wget
from os import listdir, sep, path, setpgrp, devnull, mkdir, remove
from os.path import exists, isfile, isdir, dirname, basename
from progress.bar import Bar
import re
import warnings
from fix_old_data import save_page
from get_text_field import get_text_field_on_image
sys.path.append('svgscripts')
from datatypes.faksimile_image import FaksimileImage
from datatypes.faksimile import FaksimilePage
from datatypes.matrix import Matrix
from datatypes.archival_manuscript import ArchivalManuscriptUnity
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from util import back_up, back_up_svg_file, copy_faksimile_update_image_location, copy_faksimile_svg_file
from process_files import update_svgposfile_status
from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR
sys.path.append('shared_util')
from myxmlwriter import copy_to_bak_dir, write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
from main_util import create_function_dictionary
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
UNITTESTING = False
NAME = 'folio-name'
DESCRIPTION_KEY = 'description'
ALIAS = 'alias'
LABEL = 'label'
URL = 'nietzschesource-url'
def _create_name(current_title, tmp_items, includeTitle=True) ->str:
"""Return a name.
"""
if not includeTitle:
return '_'.join([ tmp_row[ALIAS].replace(current_title, '').replace(',','').replace(' ', '') for tmp_row in tmp_items if 'thumb' not in tmp_row[LABEL] ])
return current_title.replace(' ','-') + ',' + '_'.join([ tmp_row[ALIAS].replace(current_title, '').replace(',','').replace(' ', '') for tmp_row in tmp_items if 'thumb' not in tmp_row[LABEL] ])
def _update_old_name(current_title, name_dictionary, last_description, tmp_items, items):
folio_name = _create_name(current_title, tmp_items)
if bool(name_dictionary.get(current_title + last_description)):
last_folio_name = name_dictionary[current_title + last_description]
folio_name = last_folio_name + '_' + _create_name(current_title, tmp_items, includeTitle=False)
for old_row in [ tmp_row for tmp_row in items if tmp_row[NAME] == last_folio_name ]:
old_row[NAME] = folio_name
name_dictionary.update({ current_title + last_description: folio_name})
for tmp_row in tmp_items:
tmp_row[NAME] = folio_name
items.append(tmp_row)
def process_csv_file(csv_file_name):
"""Process the csv_file: create a name for folios.
"""
items = []
fieldnames = []
current_title = None
with open(csv_file_name, newline='') as csvfile:
reader = csv.DictReader(csvfile)
fieldnames = reader.fieldnames
last_description = None
tmp_items = []
name_dictionary = {}
if len([ key for key in reader.fieldnames if DESCRIPTION_KEY in key ]) > 0:
for row in reader:
if current_title != row[ALIAS][:row[ALIAS].index(',')]:
if len(tmp_items) > 0:
_update_old_name(current_title, name_dictionary, last_description, tmp_items, items)
tmp_items = []
current_title = row[ALIAS][:row[ALIAS].index(',')]
last_description = row[DESCRIPTION_KEY]
tmp_items.append(row)
elif bool(row[DESCRIPTION_KEY]):
if row[DESCRIPTION_KEY] != last_description:
if len(tmp_items) > 0:
_update_old_name(current_title, name_dictionary, last_description, tmp_items, items)
last_description = row[DESCRIPTION_KEY]
tmp_items = [ row ]
else:
tmp_items.append(row)
else:
return 2
if len(tmp_items) > 0:
_update_old_name(current_title,name_dictionary, last_description, tmp_items, items)
target_csv_file = 'updated_' + csv_file_name
with open(target_csv_file, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in items:
writer.writerow(row)
return 0
def process_fix_url(csv_file_name):
"""Process the csv_file: create a name for folios.
"""
NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
THUMB_URL_PREFIX = 'http://www.nietzschesource.org/DFGAapi/images/DFGA/'
items = []
fieldnames = []
with open(csv_file_name, newline='') as csvfile:
reader = csv.DictReader(csvfile)
fieldnames = reader.fieldnames
for row in reader:
if not '_thumb' in row[LABEL] and not row[URL].endswith(row[LABEL].replace('.jpg','')):
row[URL] = NIETZSCHE_SOURCES_URL + row[LABEL].replace('.jpg','')
elif '_thumb' in row[LABEL] and row[URL].startswith(NIETZSCHE_SOURCES_URL):
title = row[LABEL][:row[LABEL].index(',')]
row[URL] = THUMB_URL_PREFIX + title + '/mini/' + row[LABEL].replace('_thumb','')
items.append(row)
target_csv_file = 'url_fixed_' + csv_file_name
with open(target_csv_file, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in items:
writer.writerow(row)
return 0
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to add or update a faksimile image to a page xml file.
fixes/create_folio_name.py [OPTIONS] <csv-file>
<csv-file> a csv-file containing a folio descriptions.
OPTIONS:
-h|--help show help
-f|--fix-url fix image url
:return: exit code (int)
"""
fix_url = False
try:
opts, args = getopt.getopt(argv, "hf", ["help", "fix-url"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-f', '--fix-url'):
fix_url = True
exit_status = 0
if len(args) < 1:
usage()
return 2
csv_file = args[0]
if isfile(csv_file):
if fix_url:
process_fix_url(csv_file)
else:
process_csv_file(csv_file)
else:
raise FileNotFoundError('File {csv_file} does not exist!')
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline