Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91674567
bibconvert.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Nov 13, 08:31
Size
47 KB
Mime Type
text/x-python
Expires
Fri, Nov 15, 08:31 (2 d)
Engine
blob
Format
Raw Data
Handle
22305295
Attached To
R3600 invenio-infoscience
bibconvert.py
View Options
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002, 2003, 2004, 2005 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""BibConvert tool to convert bibliographic records from any format to any format."""
__version__
=
"$Id$"
try
:
import
fileinput
import
string
import
os
import
re
import
sys
import
time
import
getopt
from
time
import
gmtime
,
strftime
,
localtime
import
os.path
except
ImportError
,
e
:
print
"Error:
%s
"
%
e
import
sys
sys
.
exit
(
1
)
try
:
from
config
import
*
from
search_engine
import
perform_request_search
from
oai_repository_config
import
oaiidprefix
except
ImportError
,
e
:
print
"Error:
%s
"
%
e
sys
.
exit
(
1
)
### Matching records with database content
def
parse_query_string
(
query_string
):
"""Parse query string, e.g.:
Input: 245__a::REP(-, )::SHAPE::SUP(SPACE, )::MINL(4)::MAXL(8)::EXPW(PUNCT)::WORDS(4,L)::SHAPE::SUP(SPACE, )||700__a::MINL(2)::REP(COMMA,).
Output:[['245__a','REP(-,)','SHAPE','SUP(SPACE, )','MINL(4)','MAXL(8)','EXPW(PUNCT)','WORDS(4,L)','SHAPE','SUP(SPACE, )'],['700__a','MINL(2)','REP(COMMA,)']]
"""
query_string_out
=
[]
query_string_out_in
=
[]
query_string_split_1
=
query_string
.
split
(
'||'
)
for
item_1
in
query_string_split_1
:
query_string_split_2
=
item_1
.
split
(
'::'
)
query_string_out_in
=
[]
for
item
in
query_string_split_2
:
query_string_out_in
.
append
(
item
)
query_string_out
.
append
(
query_string_out_in
)
return
query_string_out
def
set_conv
():
"""
bibconvert common settings
=======================
minimal length of output line = 1
maximal length of output line = 4096
"""
conv_setting
=
[
1
,
4096
]
return
conv_setting
def
get_pars
(
fn
):
"Read function and its parameters into list"
out
=
[]
out
.
append
(
re
.
split
(
'\(|\)'
,
fn
)[
0
])
out
.
append
(
re
.
split
(
','
,
re
.
split
(
'\(|\)'
,
fn
)[
1
]))
return
out
def
append_to_output_file
(
filename
,
output
):
"bibconvert output file creation by output line"
try
:
file
=
open
(
filename
,
'a'
)
file
.
write
(
output
)
file
.
close
()
except
IOError
,
e
:
exit_on_error
(
"Cannot write into
%s
"
%
filename
)
return
1
def
sub_keywd
(
out
):
"bibconvert keywords literal substitution"
out
=
string
.
replace
(
out
,
"EOL"
,
"
\n
"
)
out
=
string
.
replace
(
out
,
"_CR_"
,
"
\r
"
)
out
=
string
.
replace
(
out
,
"_LF_"
,
"
\n
"
)
out
=
string
.
replace
(
out
,
"
\\
"
,
'
\\
'
)
out
=
string
.
replace
(
out
,
"
\r
"
,
'
\r
'
)
out
=
string
.
replace
(
out
,
"BSLASH"
,
'
\\
'
)
out
=
string
.
replace
(
out
,
"COMMA"
,
','
)
out
=
string
.
replace
(
out
,
"LEFTB"
,
'['
)
out
=
string
.
replace
(
out
,
"RIGHTB"
,
']'
)
out
=
string
.
replace
(
out
,
"LEFTP"
,
'('
)
out
=
string
.
replace
(
out
,
"RIGHTP"
,
')'
)
return
out
def
check_split_on
(
data_item_split
,
sep
,
tpl_f
):
"""
bibconvert conditional split with following conditions
===================================================
::NEXT(N,TYPE,SIDE) - next N chars are of the TYPE having the separator on the SIDE
::PREV(N,TYPE,SIDE) - prev.N chars are of the TYPE having the separator on the SIDE
"""
fn
=
get_pars
(
tpl_f
)[
0
]
par
=
get_pars
(
tpl_f
)[
1
]
done
=
0
while
(
done
==
0
):
if
(
((
fn
==
"NEXT"
)
and
(
par
[
2
]
==
"R"
))
or
((
fn
==
"PREV"
)
and
(
par
[
2
]
==
"L"
))
):
test_value
=
data_item_split
[
0
][
-
(
string
.
atoi
(
par
[
0
])):]
elif
(
((
fn
==
"NEXT"
)
and
(
par
[
2
]
==
"L"
))
or
((
fn
==
"PREV"
)
and
(
par
[
2
]
==
"R"
))
):
test_value
=
data_item_split
[
1
][:(
string
.
atoi
(
par
[
0
]))]
data_item_split_tmp
=
[]
if
((
FormatField
(
test_value
,
"SUP("
+
par
[
1
]
+
",)"
)
!=
""
)
or
(
len
(
test_value
)
<
string
.
atoi
(
par
[
0
]))):
data_item_split_tmp
=
data_item_split
[
1
]
.
split
(
sep
,
1
)
if
(
len
(
data_item_split_tmp
)
==
1
):
done
=
1
data_item_split
[
0
]
=
data_item_split
[
0
]
+
sep
+
data_item_split_tmp
[
0
]
data_item_split
[
1
]
=
""
else
:
data_item_split
[
0
]
=
data_item_split
[
0
]
+
sep
+
data_item_split_tmp
[
0
]
data_item_split
[
1
]
=
data_item_split_tmp
[
1
]
else
:
done
=
1
return
data_item_split
def
get_subfields
(
data
,
subfield
,
src_tpl
):
"Get subfield according to the template"
out
=
[]
for
data_item
in
data
:
found
=
0
for
src_tpl_item
in
src_tpl
:
if
(
src_tpl_item
[:
2
]
==
"<:"
):
if
(
src_tpl_item
[
2
:
-
2
]
==
subfield
):
found
=
1
else
:
sep_in_list
=
src_tpl_item
.
split
(
"::"
)
sep
=
sep_in_list
[
0
]
data_item_split
=
data_item
.
split
(
sep
,
1
)
if
(
len
(
data_item_split
)
==
1
):
data_item
=
data_item_split
[
0
]
else
:
if
(
len
(
sep_in_list
)
>
1
):
data_item_split
=
check_split_on
(
data_item
.
split
(
sep
,
1
),
sep_in_list
[
0
],
sep_in_list
[
1
])
if
(
found
==
1
):
data_item
=
data_item_split
[
0
]
else
:
data_item
=
string
.
join
(
data_item_split
[
1
:],
sep
)
out
.
append
(
data_item
)
return
out
def
exp_n
(
word
):
"Replace newlines and carriage return's from string."
out
=
""
for
ch
in
word
:
if
((
ch
!=
'
\n
'
)
and
(
ch
!=
'
\r
'
)):
out
=
out
+
ch
return
out
def
exp_e
(
list
):
"Expunge empty elements from a list"
out
=
[]
for
item
in
list
:
item
=
exp_n
(
item
)
if
((
item
!=
'
\r\n
'
and
item
!=
'
\r
'
and
item
!=
'
\n
'
and
item
!=
""
and
len
(
item
)
!=
0
)):
out
.
append
(
item
)
return
out
def
sup_e
(
word
):
"Replace spaces"
out
=
""
for
ch
in
word
:
if
(
ch
!=
' '
):
out
=
out
+
ch
return
out
def
select_line
(
field_code
,
list
):
"Return appropriate item from a list"
out
=
[
''
]
for
field
in
list
:
field
[
0
]
=
sup_e
(
field
[
0
])
field_code
=
sup_e
(
field_code
)
if
(
field
[
0
]
==
field_code
):
out
=
field
[
1
]
return
out
def
parse_field_definition
(
source_field_definition
):
"Create list of source_field_definition"
word_list
=
[]
out
=
[]
word
=
""
counter
=
0
if
(
len
(
source_field_definition
.
split
(
"---"
))
==
4
):
out
=
source_field_definition
.
split
(
"---"
)
else
:
element_list_high
=
source_field_definition
.
split
(
"<:"
)
for
word_high
in
element_list_high
:
element_list_low
=
word_high
.
split
(
':>'
)
for
word_low
in
element_list_low
:
word_list
.
append
(
word_low
)
word_list
.
append
(
":>"
)
word_list
.
pop
()
word_list
.
append
(
"<:"
)
word_list
.
pop
()
for
item
in
word_list
:
word
=
word
+
item
if
(
item
==
"<:"
):
counter
=
counter
+
1
if
(
item
==
":>"
):
counter
=
counter
-
1
if
counter
==
0
:
out
.
append
(
word
)
word
=
""
return
out
def
parse_template
(
template
):
"""
bibconvert parse template
======================
in - template filename
out - [ [ field_code , [ field_template_parsed ] , [] ]
"""
out
=
[]
for
field_def
in
read_file
(
template
,
1
):
field_tpl_new
=
[]
if
((
len
(
field_def
.
split
(
"---"
,
1
))
>
1
)
and
(
field_def
[:
1
]
!=
"#"
)):
field_code
=
field_def
.
split
(
"---"
,
1
)[
0
]
field_tpl
=
parse_field_definition
(
field_def
.
split
(
"---"
,
1
)[
1
])
field_tpl_new
=
field_tpl
field_tpl
=
exp_e
(
field_tpl_new
)
out_data
=
[
field_code
,
field_tpl
]
out
.
append
(
out_data
)
return
out
def
parse_common_template
(
template
,
part
):
"""
bibconvert parse template
=========================
in - template filename
out - [ [ field_code , [ field_template_parsed ] , [] ]
"""
out
=
[]
counter
=
0
for
field_def
in
read_file
(
template
,
1
):
if
(
exp_n
(
field_def
)[:
3
]
==
"==="
):
counter
=
counter
+
1
elif
(
counter
==
part
):
field_tpl_new
=
[]
if
((
len
(
field_def
.
split
(
"---"
,
1
))
>
1
)
and
(
field_def
[:
1
]
!=
"#"
)):
field_code
=
field_def
.
split
(
"---"
,
1
)[
0
]
field_tpl
=
parse_field_definition
(
field_def
.
split
(
"---"
,
1
)[
1
])
field_tpl_new
=
field_tpl
field_tpl
=
exp_e
(
field_tpl_new
)
out_data
=
[
field_code
,
field_tpl
]
out
.
append
(
out_data
)
return
out
def
parse_input_data_f
(
source_data_open
,
source_tpl
):
"""
bibconvert parse input data
========================
in - input source data location (filehandle)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]
input data file; by line: - fieldcode value
"""
global
separator
out
=
[[
''
,[]]]
count
=
0
values
=
[]
while
(
count
<
1
):
line
=
source_data_open
.
readline
()
if
(
line
==
""
):
return
(
-
1
)
line_split
=
line
.
split
(
" "
,
1
)
if
(
re
.
sub
(
"\s"
,
""
,
line
)
==
separator
):
count
=
count
+
1
if
(
len
(
line_split
)
==
2
):
field_code
=
line_split
[
0
]
field_value
=
exp_n
(
line_split
[
1
])
values
.
append
([
field_code
,
field_value
])
item_prev
=
""
stack
=
[
''
]
for
item
in
values
:
if
((
item
[
0
]
==
item_prev
)
or
(
item_prev
==
""
)):
stack
.
append
(
item
[
1
])
item_prev
=
item
[
0
]
else
:
out
.
append
([
item_prev
,
stack
])
item_prev
=
item
[
0
]
stack
=
[]
stack
.
append
(
item
[
1
])
try
:
if
(
stack
[
0
]
!=
""
):
if
(
out
[
0
][
0
]
==
""
):
out
=
[]
out
.
append
([
field_code
,
stack
])
except
IndexError
,
e
:
out
=
out
return
out
def
parse_input_data_fx
(
source_tpl
):
"""
bibconvert parse input data
========================
in - input source data location (filehandle)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
extraction_template_entry -
input data file - specified by extract_tpl
"""
global
separator
count
=
0
record
=
""
field_data_1_in_list
=
[]
out
=
[[
''
,[]]]
while
(
count
<
10
):
line
=
sys
.
stdin
.
readline
()
if
(
line
==
""
):
count
=
count
+
1
if
(
record
==
""
and
count
):
return
(
-
1
)
if
(
re
.
sub
(
"\s"
,
""
,
line
)
==
separator
):
count
=
count
+
10
else
:
record
=
record
+
line
for
field_defined
in
extract_tpl_parsed
:
try
:
field_defined
[
1
][
0
]
=
sub_keywd
(
field_defined
[
1
][
0
])
field_defined
[
1
][
1
]
=
sub_keywd
(
field_defined
[
1
][
1
])
except
IndexError
,
e
:
field_defined
=
field_defined
try
:
field_defined
[
1
][
2
]
=
sub_keywd
(
field_defined
[
1
][
2
])
except
IndexError
,
e
:
field_defined
=
field_defined
field_data_1
=
""
if
((
field_defined
[
1
][
0
][
0
:
2
]
==
'//'
)
and
(
field_defined
[
1
][
0
][
-
2
:]
==
'//'
)):
field_defined_regexp
=
field_defined
[
1
][
0
][
2
:
-
2
]
try
:
####
if
(
len
(
re
.
split
(
field_defined_regexp
,
record
))
==
1
):
field_data_1
=
""
field_data_1_in_list
=
[]
else
:
field_data_1_tmp
=
re
.
split
(
field_defined_regexp
,
record
,
1
)[
1
]
field_data_1_in_list
=
field_data_1_tmp
.
split
(
field_defined_regexp
)
except
IndexError
,
e
:
field_data_1
=
""
else
:
try
:
if
(
len
(
record
.
split
(
field_defined
[
1
][
0
]))
==
1
):
field_data_1
=
""
field_data_1_in_list
=
[]
else
:
field_data_1_tmp
=
record
.
split
(
field_defined
[
1
][
0
],
1
)[
1
]
field_data_1_in_list
=
field_data_1_tmp
.
split
(
field_defined
[
1
][
0
])
except
IndexError
,
e
:
field_data_1
=
""
spliton
=
[]
outvalue
=
""
field_data_2
=
""
field_data
=
""
try
:
if
((
field_defined
[
1
][
1
])
==
"EOL"
):
spliton
=
[
'
\n
'
]
elif
((
field_defined
[
1
][
1
])
==
"MIN"
):
spliton
=
[
'
\n
'
]
elif
((
field_defined
[
1
][
1
])
==
"MAX"
):
for
item
in
extract_tpl_parsed
:
try
:
spliton
.
append
(
item
[
1
][
0
])
except
IndexError
,
e
:
spliton
=
spliton
elif
(
field_defined
[
1
][
1
][
0
:
2
]
==
'//'
)
and
(
field_defined
[
1
][
1
][
-
2
:]
==
'//'
):
spliton
=
[
field_defined
[
1
][
1
][
2
:
-
2
]]
else
:
spliton
=
[
field_defined
[
1
][
1
]]
except
IndexError
,
e
:
spliton
=
""
outvalues
=
[]
for
field_data
in
field_data_1_in_list
:
outvalue
=
""
for
splitstring
in
spliton
:
field_data_2
=
""
if
(
len
(
field_data
.
split
(
splitstring
))
==
1
):
if
(
outvalue
==
""
):
field_data_2
=
field_data
else
:
field_data_2
=
outvalue
else
:
field_data_2
=
field_data
.
split
(
splitstring
)[
0
]
outvalue
=
field_data_2
field_data
=
field_data_2
outvalues
.
append
(
outvalue
)
outvalues
=
exp_e
(
outvalues
)
if
(
len
(
outvalues
)
>
0
):
if
(
out
[
0
][
0
]
==
""
):
out
=
[]
outstack
=
[]
if
(
len
(
field_defined
[
1
])
==
3
):
spliton
=
[
field_defined
[
1
][
2
]]
if
(
field_defined
[
1
][
2
][
0
:
2
]
==
'//'
)
and
(
field_defined
[
1
][
2
][
-
2
:]
==
'//'
):
spliton
=
[
field_defined
[
1
][
2
][
2
:
-
2
]]
for
item
in
outvalues
:
stack
=
re
.
split
(
spliton
[
0
],
item
)
for
stackitem
in
stack
:
outstack
.
append
(
stackitem
)
else
:
outstack
=
outvalues
out
.
append
([
field_defined
[
0
],
outstack
])
return
out
def
parse_input_data_d
(
source_data
,
source_tpl
):
"""
bibconvert parse input data
========================
in - input source data location (directory)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]
input data dir; by file: - fieldcode value per line
"""
out
=
[]
for
source_field_tpl
in
read_file
(
source_tpl
,
1
):
source_field_code
=
source_field_tpl
.
split
(
"---"
)[
0
]
source_field_data
=
read_file
(
source_data
+
source_field_code
,
0
)
source_field_data
=
exp_e
(
source_field_data
)
out_data
=
[
source_field_code
,
source_field_data
]
out
.
append
(
out_data
)
return
out
def
sub_empty_lines
(
value
):
out
=
re
.
sub
(
'
\n\n
+'
,
''
,
value
)
return
out
def
set_par_defaults
(
par1
,
par2
):
"Set default parameter when not defined"
par_new_in_list
=
par2
.
split
(
","
)
i
=
0
out
=
[]
for
par
in
par_new_in_list
:
if
(
len
(
par1
)
>
i
):
if
(
par1
[
i
]
==
""
):
out
.
append
(
par
)
else
:
out
.
append
(
par1
[
i
])
else
:
out
.
append
(
par
)
i
=
i
+
1
return
out
def
generate
(
keyword
):
"""
bibconvert generaded values:
=========================
SYSNO() - generate date as '%w%H%M%S'
WEEK(N) - generate date as '%V' with shift (N)
DATE(format) - generate date in specifieddate FORMAT
VALUE(value) - enter value literarly
OAI() - generate oai_identifier, starting value given at command line as -o<value>
"""
out
=
keyword
fn
=
keyword
+
"()"
par
=
get_pars
(
fn
)[
1
]
fn
=
get_pars
(
fn
)[
0
]
par
=
set_par_defaults
(
par
,
""
)
if
(
fn
==
"SYSNO"
):
out
=
sysno500
if
(
fn
==
"SYSNO330"
):
out
=
sysno
if
(
fn
==
"WEEK"
):
par
=
set_par_defaults
(
par
,
"0"
)
out
=
"
%02d
"
%
(
string
.
atoi
(
strftime
(
"%V"
,
localtime
()))
+
string
.
atoi
(
par
[
0
]))
if
(
string
.
atoi
(
out
)
<
0
):
out
=
"00"
if
(
fn
==
"VALUE"
):
par
=
set_par_defaults
(
par
,
""
)
out
=
par
[
0
]
if
(
fn
==
"DATE"
):
par
=
set_par_defaults
(
par
,
"%w%H%M%S,"
+
"
%d
"
%
set_conv
()[
1
])
out
=
strftime
(
par
[
0
],
localtime
())
out
=
out
[:
string
.
atoi
(
par
[
1
])]
if
(
fn
==
"XDATE"
):
par
=
set_par_defaults
(
par
,
"%w%H%M%S,"
+
",
%d
"
%
set_conv
()[
1
])
out
=
strftime
(
par
[
0
],
localtime
())
out
=
par
[
1
]
+
out
[:
string
.
atoi
(
par
[
2
])]
if
(
fn
==
"OAI"
):
out
=
"
%s
:
%d
"
%
(
oaiidprefix
,
tcounter
+
oai_identifier_from
)
return
out
def
read_file
(
filename
,
exception
):
"Read file into list"
out
=
[]
if
(
os
.
path
.
isfile
(
filename
)):
file
=
open
(
filename
,
'r'
)
out
=
file
.
readlines
()
file
.
close
()
else
:
if
exception
:
exit_on_error
(
"Cannot access file:
%s
"
%
filename
)
return
out
def
crawl_KB
(
filename
,
value
,
mode
):
"""
bibconvert look-up value in KB_file in one of following modes:
===========================================================
1 - case sensitive / match (default)
2 - not case sensitive / search
3 - case sensitive / search
4 - not case sensitive / match
5 - case sensitive / search (in KB)
6 - not case sensitive / search (in KB)
7 - case sensitive / search (reciprocal)
8 - not case sensitive / search (reciprocal)
9 - replace by _DEFAULT_ only
R - not case sensitive / search (reciprocal) (8) replace
"""
if
(
os
.
path
.
isfile
(
filename
)
!=
1
):
pathtmp
=
string
.
split
(
extract_tpl
,
"/"
)
pathtmp
.
pop
()
path
=
string
.
join
(
pathtmp
,
"/"
)
filename
=
path
+
"/"
+
filename
if
(
os
.
path
.
isfile
(
filename
)):
file_to_read
=
open
(
filename
,
"r"
)
file_read
=
file_to_read
.
readlines
()
for
line
in
file_read
:
code
=
string
.
split
(
line
,
"---"
)
if
(
mode
==
"2"
):
value_to_cmp
=
string
.
lower
(
value
)
code
[
0
]
=
string
.
lower
(
code
[
0
])
if
((
len
(
string
.
split
(
value_to_cmp
,
code
[
0
]))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
((
mode
==
"3"
)
or
(
mode
==
"0"
)):
if
((
len
(
string
.
split
(
value
,
code
[
0
]))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"4"
):
value_to_cmp
=
string
.
lower
(
value
)
code
[
0
]
=
string
.
lower
(
code
[
0
])
if
((
code
[
0
]
==
value_to_cmp
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"5"
):
if
((
len
(
string
.
split
(
code
[
0
],
value
))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"6"
):
value_to_cmp
=
string
.
lower
(
value
)
code
[
0
]
=
string
.
lower
(
code
[
0
])
if
((
len
(
string
.
split
(
code
[
0
],
value_to_cmp
))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"7"
):
if
((
len
(
string
.
split
(
code
[
0
],
value
))
>
1
)
or
(
len
(
string
.
split
(
value
,
code
[
0
]))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"8"
):
value_to_cmp
=
string
.
lower
(
value
)
code
[
0
]
=
string
.
lower
(
code
[
0
])
if
((
len
(
string
.
split
(
code
[
0
],
value_to_cmp
))
>
1
)
or
(
len
(
string
.
split
(
value_to_cmp
,
code
[
0
]))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
elif
(
mode
==
"9"
):
if
(
code
[
0
]
==
"_DEFAULT_"
):
value
=
code
[
1
]
return
value
elif
(
mode
==
"R"
):
value_to_cmp
=
string
.
lower
(
value
)
code
[
0
]
=
string
.
lower
(
code
[
0
])
if
((
len
(
string
.
split
(
code
[
0
],
value_to_cmp
))
>
1
)
or
(
len
(
string
.
split
(
value_to_cmp
,
code
[
0
]))
>
1
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
value
.
replace
(
code
[
0
],
code
[
1
])
else
:
if
((
code
[
0
]
==
value
)
or
(
code
[
0
]
==
"_DEFAULT_"
)):
value
=
code
[
1
]
return
value
return
value
def
FormatField
(
value
,
fn
):
"""
bibconvert formatting functions:
================================
ADD(prefix,suffix) - add prefix/suffix
KB(kb_file,mode) - lookup in kb_file and replace value
ABR(N,suffix) - abbreviate to N places with suffix
ABRX() - abbreviate exclusively words longer
ABRW() - abbreviate word (limit from right)
REP(x,y) - replace
SUP(type) - remove characters of certain TYPE
LIM(n,side) - limit to n letters from L/R
LIMW(string,side) - L/R after split on string
WORDS(n,side) - limit to n words from L/R
IF(value,valueT,valueF) - replace on IF condition
MINL(n) - replace words shorter than n
MINLW(n) - replace words shorter than n
MAXL(n) - replace words longer than n
EXPW(type) - replace word from value containing TYPE
EXP(STR,0/1) - replace word from value containing string
NUM() - take only digits in given string
SHAPE() - remove extra space
UP() - to uppercase
DOWN() - to lowercase
CAP() - make capitals each word
SPLIT(n,h,str,from) - only for final Aleph field, i.e. AB , maintain whole words
SPLITW(sep,h,str,from) - only for final Aleph field, split on string
CONF(filed,value,0/1) - confirm validity of output line (check other field)
CONFL(substr,0/1) - confirm validity of output line (check field being processed)
CUT(prefix,postfix) - remove substring from side
RANGE(MIN,MAX) - select items in repetitive fields
RE(regexp) - regular expressions
bibconvert character TYPES
==========================
ALPHA - alphabetic
NALPHA - not alpphabetic
NUM - numeric
NNUM - not numeric
ALNUM - alphanumeric
NALNUM - non alphanumeric
LOWER - lowercase
UPPER - uppercase
PUNCT - punctual
NPUNCT - non punctual
SPACE - space
"""
global
data_parsed
out
=
value
fn
=
fn
+
"()"
par
=
get_pars
(
fn
)[
1
]
fn
=
get_pars
(
fn
)[
0
]
regexp
=
"//"
NRE
=
len
(
regexp
)
value
=
sub_keywd
(
value
)
par_tmp
=
[]
for
item
in
par
:
item
=
sub_keywd
(
item
)
par_tmp
.
append
(
item
)
par
=
par_tmp
if
(
fn
==
"RE"
):
new_value
=
""
par
=
set_par_defaults
(
par
,
".*,0"
)
if
(
re
.
search
(
par
[
0
],
value
)
and
(
par
[
1
]
==
"0"
)):
new_value
=
value
out
=
new_value
if
(
fn
==
"KB"
):
new_value
=
""
par
=
set_par_defaults
(
par
,
"KB,0"
)
new_value
=
crawl_KB
(
par
[
0
],
value
,
par
[
1
])
out
=
new_value
elif
(
fn
==
"ADD"
):
par
=
set_par_defaults
(
par
,
","
)
out
=
par
[
0
]
+
value
+
par
[
1
]
elif
(
fn
==
"ABR"
):
par
=
set_par_defaults
(
par
,
"1,."
)
out
=
value
[:
string
.
atoi
(
par
[
0
])]
+
par
[
1
]
elif
(
fn
==
"ABRW"
):
tmp
=
FormatField
(
value
,
"ABR(1,.)"
)
tmp
=
tmp
.
upper
()
out
=
tmp
elif
(
fn
==
"ABRX"
):
par
=
set_par_defaults
(
par
,
","
)
toout
=
[]
tmp
=
value
.
split
(
" "
)
for
wrd
in
tmp
:
if
(
len
(
wrd
)
>
string
.
atoi
(
par
[
0
])):
wrd
=
wrd
[:
string
.
atoi
(
par
[
0
])]
+
par
[
1
]
toout
.
append
(
wrd
)
out
=
string
.
join
(
toout
,
" "
)
elif
(
fn
==
"SUP"
):
par
=
set_par_defaults
(
par
,
","
)
if
(
par
[
0
]
==
"NUM"
):
out
=
re
.
sub
(
'\d+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"NNUM"
):
out
=
re
.
sub
(
'\D+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"ALPHA"
):
out
=
re
.
sub
(
'[a-zA-Z]+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"NALPHA"
):
out
=
re
.
sub
(
'[^a-zA-Z]+'
,
par
[
1
],
value
)
if
((
par
[
0
]
==
"ALNUM"
)
or
(
par
[
0
]
==
"NPUNCT"
)):
out
=
re
.
sub
(
'\w+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"NALNUM"
):
out
=
re
.
sub
(
'\W+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"PUNCT"
):
out
=
re
.
sub
(
'\W+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"LOWER"
):
out
=
re
.
sub
(
'[a-z]+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"UPPER"
):
out
=
re
.
sub
(
'[A-Z]+'
,
par
[
1
],
value
)
if
(
par
[
0
]
==
"SPACE"
):
out
=
re
.
sub
(
'\s+'
,
par
[
1
],
value
)
elif
(
fn
==
"LIM"
):
par
=
set_par_defaults
(
par
,
","
)
if
(
par
[
1
]
==
"L"
):
out
=
value
[(
len
(
value
)
-
string
.
atoi
(
par
[
0
])):]
if
(
par
[
1
]
==
"R"
):
out
=
value
[:
string
.
atoi
(
par
[
0
])]
elif
(
fn
==
"LIMW"
):
par
=
set_par_defaults
(
par
,
","
)
if
(
par
[
0
]
!=
""
):
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
par
[
0
]
=
re
.
search
(
par
[
0
],
value
)
.
group
()
tmp
=
value
.
split
(
par
[
0
])
if
(
par
[
1
]
==
"L"
):
out
=
par
[
0
]
+
tmp
[
1
]
if
(
par
[
1
]
==
"R"
):
out
=
tmp
[
0
]
+
par
[
0
]
elif
(
fn
==
"WORDS"
):
tmp2
=
[
value
]
par
=
set_par_defaults
(
par
,
","
)
if
(
par
[
1
]
==
"R"
):
tmp
=
value
.
split
(
" "
)
tmp2
=
[]
i
=
0
while
(
i
<
string
.
atoi
(
par
[
0
])):
tmp2
.
append
(
tmp
[
i
])
i
=
i
+
1
if
(
par
[
1
]
==
"L"
):
tmp
=
value
.
split
(
" "
)
tmp
.
reverse
()
tmp2
=
[]
i
=
0
while
(
i
<
string
.
atoi
(
par
[
0
])):
tmp2
.
append
(
tmp
[
i
])
i
=
i
+
1
tmp2
.
reverse
()
out
=
string
.
join
(
tmp2
,
" "
)
elif
(
fn
==
"MINL"
):
par
=
set_par_defaults
(
par
,
"1"
)
tmp
=
value
.
split
(
" "
)
tmp2
=
[]
i
=
0
for
wrd
in
tmp
:
if
(
len
(
wrd
)
>=
string
.
atoi
(
par
[
0
])):
tmp2
.
append
(
wrd
)
out
=
string
.
join
(
tmp2
,
" "
)
elif
(
fn
==
"MINLW"
):
par
=
set_par_defaults
(
par
,
"1"
)
if
(
len
(
value
)
>=
string
.
atoi
(
par
[
0
])):
out
=
value
else
:
out
=
""
elif
(
fn
==
"MAXL"
):
par
=
set_par_defaults
(
par
,
"4096"
)
tmp
=
value
.
split
(
" "
)
tmp2
=
[]
i
=
0
for
wrd
in
tmp
:
if
(
len
(
wrd
)
<=
string
.
atoi
(
par
[
0
])):
tmp2
.
append
(
wrd
)
out
=
string
.
join
(
tmp2
,
" "
)
elif
(
fn
==
"REP"
):
set_par_defaults
(
par
,
","
)
if
(
par
[
0
]
!=
""
):
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
out
=
re
.
sub
(
par
[
0
],
value
)
else
:
out
=
value
.
replace
(
par
[
0
],
par
[
1
])
elif
(
fn
==
"SHAPE"
):
if
(
value
!=
""
):
out
=
value
.
strip
()
elif
(
fn
==
"UP"
):
out
=
value
.
upper
()
elif
(
fn
==
"DOWN"
):
out
=
value
.
lower
()
elif
(
fn
==
"CAP"
):
tmp
=
value
.
split
(
" "
)
out2
=
[]
for
wrd
in
tmp
:
wrd2
=
wrd
.
capitalize
()
out2
.
append
(
wrd2
)
out
=
string
.
join
(
out2
,
" "
)
elif
(
fn
==
"IF"
):
par
=
set_par_defaults
(
par
,
",,"
)
N
=
0
while
N
<
3
:
if
(
par
[
N
][
0
:
NRE
]
==
regexp
and
par
[
N
][
-
NRE
:]
==
regexp
):
par
[
N
]
=
par
[
N
][
NRE
:
-
NRE
]
par
[
N
]
=
re
.
search
(
par
[
N
],
value
)
.
group
()
N
+=
1
if
(
value
==
par
[
0
]):
out
=
par
[
1
]
else
:
out
=
par
[
2
]
if
(
out
==
"ORIG"
):
out
=
value
elif
(
fn
==
"EXP"
):
par
=
set_par_defaults
(
par
,
",0"
)
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
par
[
0
]
=
re
.
search
(
par
[
0
],
value
)
.
group
()
tmp
=
value
.
split
(
" "
)
out2
=
[]
for
wrd
in
tmp
:
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
if
((
re
.
search
(
par
[
0
],
wrd
)
.
group
()
==
wrd
)
and
(
par
[
1
]
==
"1"
)):
out2
.
append
(
wrd
)
if
((
re
.
search
(
par
[
0
],
wrd
)
.
group
()
!=
wrd
)
and
(
par
[
1
]
==
"0"
)):
out2
.
append
(
wrd
)
else
:
if
((
len
(
wrd
.
split
(
par
[
0
]))
==
1
)
and
(
par
[
1
]
==
"1"
)):
out2
.
append
(
wrd
)
if
((
len
(
wrd
.
split
(
par
[
0
]))
!=
1
)
and
(
par
[
1
]
==
"0"
)):
out2
.
append
(
wrd
)
out
=
string
.
join
(
out2
,
" "
)
elif
(
fn
==
"EXPW"
):
par
=
set_par_defaults
(
par
,
",0"
)
tmp
=
value
.
split
(
" "
)
out2
=
[]
for
wrd
in
tmp
:
if
((
FormatField
(
wrd
,
"SUP("
+
par
[
0
]
+
")"
)
==
wrd
)
and
(
par
[
1
]
==
"1"
)):
out2
.
append
(
wrd
)
if
((
FormatField
(
wrd
,
"SUP("
+
par
[
0
]
+
")"
)
!=
wrd
)
and
(
par
[
1
]
==
"0"
)):
out2
.
append
(
wrd
)
out
=
string
.
join
(
out2
,
" "
)
elif
(
fn
==
"SPLIT"
):
par
=
set_par_defaults
(
par
,
"
%d
,0,,1"
%
conv_setting
[
1
])
length
=
string
.
atoi
(
par
[
0
])
+
(
string
.
atoi
(
par
[
1
]))
header
=
string
.
atoi
(
par
[
1
])
headerplus
=
par
[
2
]
starting
=
string
.
atoi
(
par
[
3
])
line
=
""
tmp2
=
[]
tmp3
=
[]
tmp
=
value
.
split
(
" "
)
linenumber
=
1
if
(
linenumber
>=
starting
):
tmp2
.
append
(
headerplus
)
line
=
line
+
headerplus
for
wrd
in
tmp
:
line
=
line
+
" "
+
wrd
tmp2
.
append
(
wrd
)
if
(
len
(
line
)
>
length
):
linenumber
=
linenumber
+
1
line
=
tmp2
.
pop
()
toout
=
string
.
join
(
tmp2
)
tmp3
.
append
(
toout
)
tmp2
=
[]
line2
=
value
[:
header
]
if
(
linenumber
>=
starting
):
line3
=
line2
+
headerplus
+
line
else
:
line3
=
line2
+
line
line
=
line3
tmp2
.
append
(
line
)
tmp3
.
append
(
line
)
out
=
string
.
join
(
tmp3
,
"
\n
"
)
out
=
FormatField
(
out
,
"SHAPE()"
)
elif
(
fn
==
"SPLITW"
):
par
=
set_par_defaults
(
par
,
",0,,1"
)
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
str
=
re
.
search
(
par
[
0
],
value
)
header
=
string
.
atoi
(
par
[
1
])
headerplus
=
par
[
2
]
starting
=
string
.
atoi
(
par
[
3
])
counter
=
1
tmp2
=
[]
tmp
=
re
.
split
(
par
[
0
],
value
)
last
=
tmp
.
pop
()
for
wrd
in
tmp
:
counter
=
counter
+
1
if
(
counter
>=
starting
):
tmp2
.
append
(
value
[:
header
]
+
headerplus
+
wrd
+
str
)
else
:
tmp2
.
append
(
value
[:
header
]
+
wrd
+
str
)
if
(
last
!=
""
):
counter
=
counter
+
1
if
(
counter
>=
starting
):
tmp2
.
append
(
value
[:
header
]
+
headerplus
+
last
)
else
:
tmp2
.
append
(
value
[:
header
]
+
last
)
out
=
string
.
join
(
tmp2
,
"
\n
"
)
elif
(
fn
==
"CONF"
):
par
=
set_par_defaults
(
par
,
",,1"
)
found
=
0
par1
=
""
data
=
select_line
(
par
[
0
],
data_parsed
)
for
line
in
data
:
if
(
par
[
1
][
0
:
NRE
]
==
regexp
and
par
[
1
][
-
NRE
:]
==
regexp
):
par1
=
par
[
1
][
NRE
:
-
NRE
]
else
:
par1
=
par
[
1
]
if
(
par1
==
""
):
if
(
line
==
""
):
found
=
1
elif
(
len
(
re
.
split
(
par1
,
line
))
>
1
):
found
=
1
if
((
found
==
1
)
and
(
string
.
atoi
(
par
[
2
])
==
1
)):
out
=
value
if
((
found
==
1
)
and
(
string
.
atoi
(
par
[
2
])
==
0
)):
out
=
""
if
((
found
==
0
)
and
(
string
.
atoi
(
par
[
2
])
==
1
)):
out
=
""
if
((
found
==
0
)
and
(
string
.
atoi
(
par
[
2
])
==
0
)):
out
=
value
return
out
elif
(
fn
==
"CONFL"
):
set_par_defaults
(
par
,
",1"
)
if
(
par
[
0
][
0
:
NRE
]
==
regexp
and
par
[
0
][
-
NRE
:]
==
regexp
):
par
[
0
]
=
par
[
0
][
NRE
:
-
NRE
]
if
(
re
.
search
(
par
[
0
],
value
)):
if
(
string
.
atoi
(
par
[
1
])
==
1
):
out
=
value
else
:
out
=
""
else
:
if
(
string
.
atoi
(
par
[
1
])
==
1
):
out
=
""
else
:
out
=
value
return
out
elif
(
fn
==
"CUT"
):
par
=
set_par_defaults
(
par
,
","
)
left
=
value
[:
len
(
par
[
0
])]
right
=
value
[
-
(
len
(
par
[
1
])):]
if
(
left
==
par
[
0
]):
out
=
out
[
len
(
par
[
0
]):]
if
(
right
==
par
[
1
]):
out
=
out
[:
-
(
len
(
par
[
1
]))]
return
out
elif
(
fn
==
"NUM"
):
tmp
=
re
.
findall
(
'\d'
,
value
)
out
=
string
.
join
(
tmp
,
""
)
return
out
def
printInfo
():
"print out when not enough parmeters given"
print
"""
BibConvert data convertor
Usage: bibconvert [options] -ctemplate.cfg < input.dat
Options:
-c'config' configuration templates file
-d'directory' source_data fields are located in separated files in 'directory'one record)
-h print this help
-V print version number
-l'length' minimum line length (default = 1)
-o'value' OAI identifier starts with specified value (default = 1)
-b'file header' insert file header
-e'file footer' insert file footer
-B'record header' insert record header
-E'record footer' insert record footer
-s'record separator' record separator, default empty line (EOLEOL)
-m0'query_string' match records using query string, output unmatched
-m1'query_string' match records using query string, output matched
-m2'query_string' match records using query string, output ambiguous
-Cx'field extraction template' alternative to -c when configuration is split to several files
-Cs'source data template' alternative to -c when configuration is split to several files
-Ct'target data template' alternative to -c when configuration is split to several files
"""
## Match records with the database content
##
def
match_in_database
(
record
,
query_string
):
"Check if record is in alreadey in database with an oai identifier. Returns recID if present, 0 otherwise."
query_string_parsed
=
parse_query_string
(
query_string
)
search_pattern
=
[]
search_field
=
[]
for
query_field
in
query_string_parsed
:
ind1
=
query_field
[
0
][
3
:
4
]
if
ind1
==
"_"
:
ind1
=
""
ind2
=
query_field
[
0
][
4
:
5
]
if
ind2
==
"_"
:
ind2
=
""
stringsplit
=
"<datafield tag=
\"
%s
\"
ind1=
\"
%s
\"
ind2=
\"
%s
\"
><subfield code=
\"
%s
\"
>"
%
(
query_field
[
0
][
0
:
3
],
ind1
,
ind2
,
query_field
[
0
][
5
:
6
])
formatting
=
query_field
[
1
:]
record1
=
string
.
split
(
record
,
stringsplit
)
if
len
(
record1
)
>
1
:
matching_value
=
string
.
split
(
record1
[
1
],
"<"
)[
0
]
for
fn
in
formatting
:
matching_value
=
FormatField
(
matching_value
,
fn
)
search_pattern
.
append
(
matching_value
)
search_field
.
append
(
query_field
[
0
])
search_field
.
append
(
""
)
search_field
.
append
(
""
)
search_field
.
append
(
""
)
search_pattern
.
append
(
""
)
search_pattern
.
append
(
""
)
search_pattern
.
append
(
""
)
recID_list
=
perform_request_search
(
p1
=
search_pattern
[
0
],
f1
=
search_field
[
0
],
p2
=
search_pattern
[
1
],
f2
=
search_field
[
1
],
p3
=
search_pattern
[
2
],
f3
=
search_field
[
2
])
return
recID_list
def
parse_query_string
(
query_string
):
"""Parse query string, e.g.:
Input: 245__a::REP(-, )::SHAPE::SUP(SPACE, )::MINL(4)::MAXL(8)::EXPW(PUNCT)::WORDS(4,L)::SHAPE::SUP(SPACE, )||700__a::MINL(2)::REP(COMMA,).
Output:[['245__a','REP(-,)','SHAPE','SUP(SPACE, )','MINL(4)','MAXL(8)','EXPW(PUNCT)','WORDS(4,L)','SHAPE','SUP(SPACE, )'],['700__a','MINL(2)','REP(COMMA,)']]
"""
query_string_out
=
[]
query_string_out_in
=
[]
query_string_split_1
=
query_string
.
split
(
'||'
)
for
item_1
in
query_string_split_1
:
query_string_split_2
=
item_1
.
split
(
'::'
)
query_string_out_in
=
[]
for
item
in
query_string_split_2
:
query_string_out_in
.
append
(
item
)
query_string_out
.
append
(
query_string_out_in
)
return
query_string_out
def
exit_on_error
(
error_message
):
"exit when error occured"
sys
.
stderr
.
write
(
"
\n
bibconvert data convertor
\n
"
)
sys
.
stderr
.
write
(
" Error:
%s
\n
"
%
error_message
)
sys
.
exit
()
return
0
def
create_record
(
begin_record_header
,
ending_record_footer
,
query_string
,
match_mode
,
Xcount
):
"Create output record"
global
data_parsed
out_to_print
=
""
out
=
[]
field_data_item_LIST
=
[]
ssn5cnt
=
"
%3d
"
%
Xcount
sysno
=
generate
(
"DATE(%w%H%M%S)"
)
sysno500
=
generate
(
"XDATE(%w%H%M%S),"
+
ssn5cnt
)
for
T_tpl_item_LIST
in
target_tpl_parsed
:
# the line is printed only if the variables inside are not empty
print_line
=
0
to_output
=
[]
rows
=
1
for
field_tpl_item_STRING
in
T_tpl_item_LIST
[
1
]:
DATA
=
[]
if
(
field_tpl_item_STRING
[:
2
]
==
"<:"
):
field_tpl_item_STRING
=
field_tpl_item_STRING
[
2
:
-
2
]
field
=
field_tpl_item_STRING
.
split
(
"::"
)[
0
]
if
(
len
(
field_tpl_item_STRING
.
split
(
"::"
))
==
1
):
value
=
generate
(
field
)
to_output
.
append
([
value
])
else
:
subfield
=
field_tpl_item_STRING
.
split
(
"::"
)[
1
]
if
(
field
[
-
1
]
==
"*"
):
repetitive
=
1
field
=
field
[:
-
1
]
else
:
repetitive
=
0
if
dirmode
:
DATA
=
select_line
(
field
,
data_parsed
)
else
:
DATA
=
select_line
(
field
,
data_parsed
)
if
(
repetitive
==
0
):
DATA
=
[
string
.
join
(
DATA
,
" "
)]
SRC_TPL
=
select_line
(
field
,
source_tpl_parsed
)
try
:
if
(
DATA
[
0
]
!=
""
):
DATA
=
get_subfields
(
DATA
,
subfield
,
SRC_TPL
)
FF
=
field_tpl_item_STRING
.
split
(
"::"
)
if
(
len
(
FF
)
>
2
):
FF
=
FF
[
2
:]
for
fn
in
FF
:
# DATAFORMATTED = []
if
(
len
(
DATA
)
!=
0
and
DATA
[
0
]
!=
""
):
DATA
=
get_subfields
(
DATA
,
subfield
,
SRC_TPL
)
FF
=
field_tpl_item_STRING
.
split
(
"::"
)
if
(
len
(
FF
)
>
2
):
FF
=
FF
[
2
:]
for
fn2
in
FF
:
DATAFORMATTED
=
[]
for
item
in
DATA
:
item
=
FormatField
(
item
,
fn
)
DATAFORMATTED
.
append
(
item
)
DATA
=
DATAFORMATTED
if
(
len
(
DATA
)
>
rows
):
rows
=
len
(
DATA
)
if
DATA
!=
""
:
print_line
=
1
to_output
.
append
(
DATA
)
except
IndexError
,
e
:
pass
else
:
to_output
.
append
([
field_tpl_item_STRING
])
current
=
0
default_print
=
0
while
(
current
<
rows
):
line_to_print
=
[]
for
item
in
to_output
:
if
(
item
==
[]):
item
=
[
''
]
if
(
len
(
item
)
<=
current
):
printout
=
item
[
0
]
else
:
printout
=
item
[
current
]
line_to_print
.
append
(
printout
)
output
=
exp_n
(
string
.
join
(
line_to_print
,
""
))
global_formatting_functions
=
T_tpl_item_LIST
[
0
]
.
split
(
"::"
)[
1
:]
for
GFF
in
global_formatting_functions
:
if
(
GFF
[:
5
]
==
"RANGE"
):
parR
=
get_pars
(
GFF
)[
1
]
parR
=
set_par_defaults
(
parR
,
"MIN,MAX"
)
if
(
parR
[
0
]
!=
"MIN"
):
if
(
string
.
atoi
(
parR
[
0
])
>
(
current
+
1
)):
output
=
""
if
(
parR
[
1
]
!=
"MAX"
):
if
(
string
.
atoi
(
parR
[
1
])
<
(
current
+
1
)):
output
=
""
elif
(
GFF
[:
4
]
==
"DEFP"
):
default_print
=
1
else
:
output
=
FormatField
(
output
,
GFF
)
if
((
len
(
output
)
>
set_conv
()[
0
]
and
print_line
==
1
)
or
default_print
):
out_to_print
=
out_to_print
+
output
+
"
\n
"
current
=
current
+
1
###
out_flag
=
0
if
query_string
:
recID
=
match_in_database
(
out_to_print
,
query_string
)
if
len
(
recID
)
==
1
and
match_mode
==
1
:
ctrlfield
=
"<controlfield tag=
\"
001
\"
>
%d
</controlfield>"
%
(
recID
[
0
])
out_to_print
=
ctrlfield
+
"
\n
"
+
out_to_print
out_flag
=
1
if
len
(
recID
)
==
0
and
match_mode
==
0
:
out_flag
=
1
if
len
(
recID
)
>
1
and
match_mode
==
2
:
out_flag
=
1
if
out_flag
or
match_mode
==
-
1
:
if
begin_record_header
!=
""
:
out_to_print
=
begin_record_header
+
"
\n
"
+
out_to_print
if
ending_record_footer
!=
""
:
out_to_print
=
out_to_print
+
"
\n
"
+
ending_record_footer
else
:
out_to_print
=
""
return
out_to_print
def
convert
(
ar_
):
global
dirmode
,
Xcount
,
conv_setting
,
sysno
,
sysno500
,
separator
,
tcounter
,
source_data
,
query_string
,
match_mode
,
begin_record_header
,
ending_record_footer
,
output_rec_sep
,
begin_header
,
ending_footer
,
oai_identifier_from
,
source_tpl
,
source_tpl_parsed
,
target_tpl
,
target_tpl_parsed
,
extract_tpl
,
extract_tpl_parsed
,
data_parsed
dirmode
,
Xcount
,
conv_setting
,
sysno
,
sysno500
,
separator
,
tcounter
,
source_data
,
query_string
,
match_mode
,
begin_record_header
,
ending_record_footer
,
output_rec_sep
,
begin_header
,
ending_footer
,
oai_identifier_from
,
source_tpl
,
source_tpl_parsed
,
target_tpl
,
target_tpl_parsed
,
extract_tpl
,
extract_tpl_parsed
=
ar_
# separator = spt
if
dirmode
:
if
(
os
.
path
.
isdir
(
source_data
)):
data_parsed
=
parse_input_data_d
(
source_data
,
source_tpl
)
record
=
create_record
(
begin_record_header
,
ending_record_footer
,
query_string
,
match_mode
,
Xcount
)
if
record
!=
""
:
print
record
tcounter
=
tcounter
+
1
if
output_rec_sep
!=
""
:
print
output_rec_sep
else
:
exit_on_error
(
"Cannot access directory:
%s
"
%
source_data
)
else
:
done
=
0
print
begin_header
while
(
done
==
0
):
data_parsed
=
parse_input_data_fx
(
source_tpl
)
if
(
data_parsed
==
-
1
):
done
=
1
else
:
if
(
data_parsed
[
0
][
0
]
!=
''
):
record
=
create_record
(
begin_record_header
,
ending_record_footer
,
query_string
,
match_mode
,
Xcount
)
Xcount
+=
1
if
record
!=
""
:
print
record
tcounter
=
tcounter
+
1
if
output_rec_sep
!=
""
:
print
output_rec_sep
print
ending_footer
return
Event Timeline
Log In to Comment