Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61561777
create_folio_name.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 7, 11:21
Size
7 KB
Mime Type
text/x-python
Expires
Thu, May 9, 11:21 (2 d)
Engine
blob
Format
Raw Data
Handle
17528517
Attached To
rNIETZSCHEPYTHON nietzsche-python
create_folio_name.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""This program can be used to add a name for a folio.
"""
# Copyright (C) University of Basel 2021 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
csv
import
getopt
import
json
import
lxml.etree
as
ET
import
shutil
import
subprocess
import
sys
import
os
import
wget
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
,
mkdir
,
remove
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
re
import
warnings
from
fix_old_data
import
save_page
from
get_text_field
import
get_text_field_on_image
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.faksimile_image
import
FaksimileImage
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.matrix
import
Matrix
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.page
import
Page
,
STATUS_MERGED_OK
,
STATUS_POSTMERGED_OK
from
util
import
back_up
,
back_up_svg_file
,
copy_faksimile_update_image_location
,
copy_faksimile_svg_file
from
process_files
import
update_svgposfile_status
from
process_words_post_merging
import
update_faksimile_line_positions
,
MERGED_DIR
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
copy_to_bak_dir
,
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
main_util
import
create_function_dictionary
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
UNITTESTING
=
False
NAME
=
'folio-name'
DESCRIPTION_KEY
=
'description'
ALIAS
=
'alias'
LABEL
=
'label'
URL
=
'nietzschesource-url'
def
_create_name
(
current_title
,
tmp_items
,
includeTitle
=
True
)
->
str
:
"""Return a name.
"""
if
not
includeTitle
:
return
'_'
.
join
([
tmp_row
[
ALIAS
]
.
replace
(
current_title
,
''
)
.
replace
(
','
,
''
)
.
replace
(
' '
,
''
)
for
tmp_row
in
tmp_items
if
'thumb'
not
in
tmp_row
[
LABEL
]
])
return
current_title
.
replace
(
' '
,
'-'
)
+
','
+
'_'
.
join
([
tmp_row
[
ALIAS
]
.
replace
(
current_title
,
''
)
.
replace
(
','
,
''
)
.
replace
(
' '
,
''
)
for
tmp_row
in
tmp_items
if
'thumb'
not
in
tmp_row
[
LABEL
]
])
def
_update_old_name
(
current_title
,
name_dictionary
,
last_description
,
tmp_items
,
items
):
folio_name
=
_create_name
(
current_title
,
tmp_items
)
if
bool
(
name_dictionary
.
get
(
current_title
+
last_description
)):
last_folio_name
=
name_dictionary
[
current_title
+
last_description
]
folio_name
=
last_folio_name
+
'_'
+
_create_name
(
current_title
,
tmp_items
,
includeTitle
=
False
)
for
old_row
in
[
tmp_row
for
tmp_row
in
items
if
tmp_row
[
NAME
]
==
last_folio_name
]:
old_row
[
NAME
]
=
folio_name
name_dictionary
.
update
({
current_title
+
last_description
:
folio_name
})
for
tmp_row
in
tmp_items
:
tmp_row
[
NAME
]
=
folio_name
items
.
append
(
tmp_row
)
def
process_csv_file
(
csv_file_name
):
"""Process the csv_file: create a name for folios.
"""
items
=
[]
fieldnames
=
[]
current_title
=
None
with
open
(
csv_file_name
,
newline
=
''
)
as
csvfile
:
reader
=
csv
.
DictReader
(
csvfile
)
fieldnames
=
reader
.
fieldnames
last_description
=
None
tmp_items
=
[]
name_dictionary
=
{}
if
len
([
key
for
key
in
reader
.
fieldnames
if
DESCRIPTION_KEY
in
key
])
>
0
:
for
row
in
reader
:
if
current_title
!=
row
[
ALIAS
][:
row
[
ALIAS
]
.
index
(
','
)]:
if
len
(
tmp_items
)
>
0
:
_update_old_name
(
current_title
,
name_dictionary
,
last_description
,
tmp_items
,
items
)
tmp_items
=
[]
current_title
=
row
[
ALIAS
][:
row
[
ALIAS
]
.
index
(
','
)]
last_description
=
row
[
DESCRIPTION_KEY
]
tmp_items
.
append
(
row
)
elif
bool
(
row
[
DESCRIPTION_KEY
]):
if
row
[
DESCRIPTION_KEY
]
!=
last_description
:
if
len
(
tmp_items
)
>
0
:
_update_old_name
(
current_title
,
name_dictionary
,
last_description
,
tmp_items
,
items
)
last_description
=
row
[
DESCRIPTION_KEY
]
tmp_items
=
[
row
]
else
:
tmp_items
.
append
(
row
)
else
:
return
2
if
len
(
tmp_items
)
>
0
:
_update_old_name
(
current_title
,
name_dictionary
,
last_description
,
tmp_items
,
items
)
target_csv_file
=
'updated_'
+
csv_file_name
with
open
(
target_csv_file
,
'w'
,
newline
=
''
)
as
csvfile
:
writer
=
csv
.
DictWriter
(
csvfile
,
fieldnames
=
fieldnames
)
writer
.
writeheader
()
for
row
in
items
:
writer
.
writerow
(
row
)
return
0
def
process_fix_url
(
csv_file_name
):
"""Process the csv_file: create a name for folios.
"""
NIETZSCHE_SOURCES_URL
=
'http://www.nietzschesource.org/DFGAapi/api/page/download/'
THUMB_URL_PREFIX
=
'http://www.nietzschesource.org/DFGAapi/images/DFGA/'
items
=
[]
fieldnames
=
[]
with
open
(
csv_file_name
,
newline
=
''
)
as
csvfile
:
reader
=
csv
.
DictReader
(
csvfile
)
fieldnames
=
reader
.
fieldnames
for
row
in
reader
:
if
not
'_thumb'
in
row
[
LABEL
]
and
not
row
[
URL
]
.
endswith
(
row
[
LABEL
]
.
replace
(
'.jpg'
,
''
)):
row
[
URL
]
=
NIETZSCHE_SOURCES_URL
+
row
[
LABEL
]
.
replace
(
'.jpg'
,
''
)
elif
'_thumb'
in
row
[
LABEL
]
and
row
[
URL
]
.
startswith
(
NIETZSCHE_SOURCES_URL
):
title
=
row
[
LABEL
][:
row
[
LABEL
]
.
index
(
','
)]
row
[
URL
]
=
THUMB_URL_PREFIX
+
title
+
'/mini/'
+
row
[
LABEL
]
.
replace
(
'_thumb'
,
''
)
items
.
append
(
row
)
target_csv_file
=
'url_fixed_'
+
csv_file_name
with
open
(
target_csv_file
,
'w'
,
newline
=
''
)
as
csvfile
:
writer
=
csv
.
DictWriter
(
csvfile
,
fieldnames
=
fieldnames
)
writer
.
writeheader
()
for
row
in
items
:
writer
.
writerow
(
row
)
return
0
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to add or update a faksimile image to a page xml file.
fixes/create_folio_name.py [OPTIONS] <csv-file>
<csv-file> a csv-file containing a folio descriptions.
OPTIONS:
-h|--help show help
-f|--fix-url fix image url
:return: exit code (int)
"""
fix_url
=
False
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hf"
,
[
"help"
,
"fix-url"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-f'
,
'--fix-url'
):
fix_url
=
True
exit_status
=
0
if
len
(
args
)
<
1
:
usage
()
return
2
csv_file
=
args
[
0
]
if
isfile
(
csv_file
):
if
fix_url
:
process_fix_url
(
csv_file
)
else
:
process_csv_file
(
csv_file
)
else
:
raise
FileNotFoundError
(
'File {csv_file} does not exist!'
)
return
exit_status
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment