Page MenuHomec4science

bibconvert.in
No OneTemporary

File Metadata

Created
Sat, May 11, 11:22

bibconvert.in

## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
## start Python:
<protect>#!</protect><PYTHON>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
## okay, rest of the Python code goes below
#######
<protect>
def set_conv():
"""
bibconvert common settings
=======================
minimal length of output line = 1
maximal length of output line = 4096
"""
conv_setting = [
1,
4096
]
return conv_setting
def get_options():
"Read command line options into list"
out = []
for arg in sys.argv:
if (arg[:1] == "-"):
out.append(arg)
return out
def get_arguments():
"Read command line arguments into list"
out = []
for arg in sys.argv:
if (arg[:1] != "-"):
out.append(arg)
return out
def get_pars(fn):
"Read function and its parameters into list"
out = []
out.append(re.split('\(|\)',fn)[0])
out.append(re.split(',',re.split('\(|\)',fn)[1]))
return out
def is_opt(seek,opt_list):
"Return entire argument if given in the list of options"
out = ""
for arg in opt_list:
if (seek == arg[:2]):
out = arg
if (seek == arg[:3]):
out = arg
return out
def append_to_output_file(filename, output):
"bibconvert output file creation by output line"
try:
file = open(filename,'a')
file.write(output)
file.close()
except IOError, e:
exit_on_error("Cannot write into %s" % filename)
return 1
def sub_keywd(out):
"bibconvert keywords literal substitution"
out = string.replace(out,"EOL","\n")
out = string.replace(out,"_CR_","\r")
out = string.replace(out,"_LF_","\n")
out = string.replace(out,"\\",'\\')
out = string.replace(out,"\r",'\r')
out = string.replace(out,"BSLASH",'\\')
out = string.replace(out,"COMMA",',')
out = string.replace(out,"LEFTB",'[')
out = string.replace(out,"RIGHTB",']')
out = string.replace(out,"LEFTP",'(')
out = string.replace(out,"RIGHTP",')')
return out
def check_split_on(data_item_split, sep, tpl_f):
"""
bibconvert conditional split with following conditions
===================================================
::NEXT(N,TYPE,SIDE) - next N chars are of the TYPE having the separator on the SIDE
::PREV(N,TYPE,SIDE) - prev.N chars are of the TYPE having the separator on the SIDE
"""
fn = get_pars(tpl_f)[0]
par = get_pars(tpl_f)[1]
done = 0
while (done == 0):
if ( (( fn == "NEXT" ) and ( par[2]=="R" )) or
(( fn == "PREV" ) and ( par[2]=="L" )) ):
test_value = data_item_split[0][-(string.atoi(par[0])):]
elif ( ((fn == "NEXT") and ( par[2]=="L")) or
((fn == "PREV") and ( par[2]=="R")) ):
test_value = data_item_split[1][:(string.atoi(par[0]))]
data_item_split_tmp = []
if ((FormatField(test_value,"SUP(" + par[1] + ",)") != "")or(len(test_value) < string.atoi(par[0]))):
data_item_split_tmp = data_item_split[1].split(sep,1)
if(len(data_item_split_tmp)==1):
done = 1
data_item_split[0] = data_item_split[0] + sep + data_item_split_tmp[0]
data_item_split[1] = ""
else:
data_item_split[0] = data_item_split[0] + sep + data_item_split_tmp[0]
data_item_split[1] = data_item_split_tmp[1]
else:
done = 1
return data_item_split
def get_subfields(data,subfield,src_tpl):
"Get subfield according to the template"
out = []
for data_item in data:
found = 0
for src_tpl_item in src_tpl:
if (src_tpl_item[:2] == "<:"):
if (src_tpl_item[2:-2] == subfield):
found = 1
else:
sep_in_list = src_tpl_item.split("::")
sep = sep_in_list[0]
data_item_split = data_item.split(sep,1)
if (len(data_item_split)==1):
data_item = data_item_split[0]
else:
if (len(sep_in_list) > 1):
data_item_split = check_split_on(data_item.split(sep,1), sep_in_list[0],sep_in_list[1])
if(found == 1):
data_item = data_item_split[0]
else:
data_item = string.join(data_item_split[1:],sep)
out.append(data_item)
return out
def exp_n(word):
"Replace newlines and carriage return's from string."
out = ""
for ch in word:
if ((ch != '\n') and (ch != '\r')):
out = out + ch
return out
def exp_e(list):
"Expunge empty elements from a list"
out = []
for item in list:
item = exp_n(item)
if ((item != '\r\n' and item != '\r' and item != '\n' and item !="" and len(item)!=0)):
out.append(item)
return out
def sup_e(word):
"Replace spaces"
out = ""
for ch in word:
if (ch != ' '):
out = out + ch
return out
def select_line(field_code, list):
"Return appropriate item from a list"
out = ['']
for field in list:
field[0] = sup_e(field[0])
field_code = sup_e(field_code)
if (field[0] == field_code):
out = field[1]
return out
def parse_field_definition(source_field_definition):
"Create list of source_field_definition"
word_list = []
out = []
word = ""
counter = 0
if (len(source_field_definition.split("---"))==4):
out = source_field_definition.split("---")
else:
element_list_high = source_field_definition.split("<:")
for word_high in element_list_high:
element_list_low = word_high.split(':>')
for word_low in element_list_low:
word_list.append(word_low)
word_list.append(":>")
word_list.pop()
word_list.append("<:")
word_list.pop()
for item in word_list:
word = word + item
if (item == "<:"):
counter = counter + 1
if (item == ":>"):
counter = counter - 1
if counter == 0:
out.append(word)
word = ""
return out
def parse_template(template):
"""
bibconvert parse template
======================
in - template filename
out - [ [ field_code , [ field_template_parsed ] , [] ]
"""
out = []
for field_def in read_file(template,1):
field_tpl_new = []
if ((len(field_def.split("---",1)) > 1) and (field_def[:1]!="#")):
field_code = field_def.split("---",1)[0]
field_tpl = parse_field_definition(field_def.split("---",1)[1])
field_tpl_new = field_tpl
field_tpl = exp_e(field_tpl_new)
out_data = [field_code, field_tpl]
out.append(out_data)
return out
def parse_common_template(template,part):
"""
bibconvert parse template
=========================
in - template filename
out - [ [ field_code , [ field_template_parsed ] , [] ]
"""
out = []
counter = 0
for field_def in read_file(template,1):
if (exp_n(field_def)[:3] == "==="):
counter = counter + 1
elif (counter == part):
field_tpl_new = []
if ((len(field_def.split("---",1)) > 1) and (field_def[:1]!="#")):
field_code = field_def.split("---",1)[0]
field_tpl = parse_field_definition(field_def.split("---",1)[1])
field_tpl_new = field_tpl
field_tpl = exp_e(field_tpl_new)
out_data = [field_code, field_tpl]
out.append(out_data)
return out
def parse_input_data_f(source_data_open, source_tpl):
"""
bibconvert parse input data
========================
in - input source data location (filehandle)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]
input data file; by line: - fieldcode value
"""
out = [['',[]]]
count = 0
values = []
while (count < 1):
line = source_data_open.readline()
if (line == ""):
return(-1)
line_split = line.split(" ",1)
if (re.sub("\s","",line) == separator):
count = count + 1
if (len(line_split) == 2):
field_code = line_split[0]
field_value = exp_n(line_split[1])
values.append([field_code,field_value])
item_prev = ""
stack = ['']
for item in values:
if ((item[0]==item_prev)or(item_prev == "")):
stack.append(item[1])
item_prev = item[0]
else:
out.append([item_prev,stack])
item_prev = item[0]
stack = []
stack.append(item[1])
try:
if (stack[0] != ""):
if (out[0][0]==""):
out = []
out.append([field_code,stack])
except IndexError, e:
out = out
return out
def parse_input_data_fx(source_tpl):
"""
bibconvert parse input data
========================
in - input source data location (filehandle)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
extraction_template_entry -
input data file - specified by extract_tpl
"""
count = 0
record = ""
field_data_1_in_list = []
out = [['',[]]]
while (count <1):
line = sys.stdin.readline()
if (line == ""):
if (record == ""):
return (-1)
if (re.sub("\s","",line) == separator):
count = count + 1
else:
record = record + line
for field_defined in extract_tpl_parsed:
try:
field_defined[1][0] = sub_keywd(field_defined[1][0])
field_defined[1][1] = sub_keywd(field_defined[1][1])
except IndexError, e:
field_defined = field_defined
try:
field_defined[1][2] = sub_keywd(field_defined[1][2])
except IndexError, e:
field_defined = field_defined
field_data_1 =""
try:
if (len(record.split(field_defined[1][0])) == 1):
field_data_1 = ""
field_data_1_in_list = []
else:
field_data_1_tmp = record.split(field_defined[1][0],1)[1]
field_data_1_in_list = field_data_1_tmp.split(field_defined[1][0])
except IndexError, e:
field_data_1 = ""
spliton = []
outvalue = ""
field_data_2 = ""
field_data = ""
try:
if ((field_defined[1][1])=="EOL"):
spliton = ['\n']
elif ((field_defined[1][1])=="MIN"):
spliton = ['\n']
elif ((field_defined[1][1])=="MAX"):
for item in extract_tpl_parsed:
try:
spliton.append(item[1][0])
except IndexError, e:
spliton = spliton
else:
spliton = [field_defined[1][1]]
except IndexError,e :
spliton = ""
outvalues = []
for field_data in field_data_1_in_list:
outvalue = ""
for splitstring in spliton:
field_data_2 = ""
if (len(field_data.split(splitstring))==1):
if (outvalue == ""):
field_data_2 = field_data
else:
field_data_2 = outvalue
else:
field_data_2 = field_data.split(splitstring)[0]
outvalue = field_data_2
field_data = field_data_2
outvalues.append(outvalue)
outvalues = exp_e(outvalues)
if (len(outvalues) > 0):
if (out[0][0]==""):
out = []
outstack = []
if (len(field_defined[1])==3):
for item in outvalues:
stack = item.split(field_defined[1][2])
for stackitem in stack:
outstack.append(stackitem)
else:
outstack = outvalues
out.append([field_defined[0],outstack])
return out
def parse_input_data_d(source_data, source_tpl):
"""
bibconvert parse input data
========================
in - input source data location (directory)
source data template
source_field_code list of source field codes
source_field_data list of source field data values (repetitive fields each line one occurence)
out - [ [ source_field_code , [ source_field_data ] ] , [] ]
source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]
input data dir; by file: - fieldcode value per line
"""
out = []
for source_field_tpl in read_file(source_tpl,1):
source_field_code = source_field_tpl.split("---")[0]
source_field_data = read_file(source_data + source_field_code,0)
source_field_data = exp_e(source_field_data)
out_data = [source_field_code, source_field_data]
out.append(out_data)
return out
def sub_empty_lines(value):
out = re.sub('\n\n+','',value)
return out
def set_par_defaults(par1,par2):
"Set default parameter when not defined"
par_new_in_list = par2.split(",")
i = 0
out = []
for par in par_new_in_list:
if (len(par1)>i):
if (par1[i] == ""):
out.append(par)
else:
out.append(par1[i])
else:
out.append(par)
i = i + 1
return out
def generate(keyword):
"""
bibconvert generaded values:
=========================
SYSNO() - generate date as '%w%H%M%S'
WEEK(N) - generate date as '%V' with shift (N)
DATE(format) - generate date in specifieddate FORMAT
VALUE(value) - enter value literarly
OAI() - generate oai_identifier, starting value given at command line as -o<value>
"""
out = keyword
fn = keyword + "()"
par = get_pars(fn)[1]
fn = get_pars(fn)[0]
par = set_par_defaults(par,"")
if (fn == "SYSNO"):
out = sysno
if (fn == "WEEK"):
par = set_par_defaults(par,"0")
out = "%02d" % (string.atoi(strftime("%V",localtime())) + string.atoi(par[0]))
if (string.atoi(out)<0):
out = "00"
if (fn == "VALUE"):
par = set_par_defaults(par,"")
out = par[0]
if (fn == "DATE"):
par = set_par_defaults(par,"%w%H%M%S," + "%d" % conv_setting[1])
out = strftime(par[0],localtime())
out = out[:string.atoi(par[1])]
if (fn == "OAI"):
oai_prefix = "</protect><OAIIDPREFIX><protect>"
out = "%s:%d" % (oai_prefix,tcounter + oai_identifier_from)
return out
def read_file(filename,exception):
"Read file into list"
out = []
if (os.path.isfile(filename)):
file = open(filename,'r')
out = file.readlines()
file.close()
else:
if exception:
exit_on_error("Cannot access file: %s" % filename)
return out
def crawl_KB(filename,value,mode):
"""
bibconvert look-up value in KB_file in one of following modes:
===========================================================
1 - case sensitive / match (default)
2 - not case sensitive / search
3 - case sensitive / search
4 - not case sensitive / match
5 - case sensitive / search (in KB)
6 - not case sensitive / search (in KB)
7 - case sensitive / search (reciprocal)
8 - not case sensitive / search (reciprocal)
9 - replace by _DEFAULT_ only
"""
if (os.path.isfile(filename) != 1):
pathtmp = string.split(extract_tpl,"/")
pathtmp.pop()
path = string.join(pathtmp,"/")
filename = path + "/" + filename
if (os.path.isfile(filename)):
file_to_read = open(filename,"r")
file_read = file_to_read.readlines()
for line in file_read:
code = string.split(line,"---")
if (mode == "2"):
value_to_cmp = string.lower(value)
code[0] = string.lower(code[0])
if ((len(string.split(value_to_cmp,code[0])) > 1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif ((mode == "3") or (mode == "0")):
if ((len(string.split(value,code[0])) > 1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "4"):
value_to_cmp = string.lower(value)
code[0] = string.lower(code[0])
if ((code[0] == value_to_cmp)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "5"):
if ((len(string.split(code[0],value)) > 1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "6"):
value_to_cmp = string.lower(value)
code[0] = string.lower(code[0])
if ((len(string.split(code[0],value_to_cmp)) > 1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "7"):
if ((len(string.split(code[0],value)) > 1)or(len(string.split(value,code[0])) > 1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "8"):
value_to_cmp = string.lower(value)
code[0] = string.lower(code[0])
if ((len(string.split(code[0],value_to_cmp)) > 1)or(len(string.split(value_to_cmp,code[0]))>1)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
elif (mode == "9"):
if (code[0]=="_DEFAULT_"):
value = code[1]
return value
else:
if ((code[0] == value)or(code[0]=="_DEFAULT_")):
value = code[1]
return value
return value
def FormatField(value,fn):
"""
bibconvert formatting functions:
=============================
ADD(prefix,suffix) - add prefix/suffix
KB(kb_file) - lookup in kb_file and replace value
ABR(N,suffix) - abbreviate to N places with suffix
ABRX() - abbreviate exclusively words longer
ABRW() - abbreviate word (limit from right)
REP(x,y) - replace
SUP(type) - remove characters of certain TYPE
LIM(n,side) - limit to n letters from L/R
LIMW(string,side) - L/R after split on string
WORDS(n,side) - limit to n words from L/R
IF(value,valueT,valueF) - replace on IF condition
MINL(n) - replace words shorter than n
MINLW(n) - replace words shorter than n
MAXL(n) - replace words longer than n
EXPW(type) - replace word from value containing TYPE
EXP(STR,0/1) - replace word from value containing string
NUM() - take only digits in given string
SHAPE() - remove extra space
UP() - to uppercase
DOWN() - to lowercase
CAP() - make capitals each word
SPLIT(n,h,str,from) - only for final Aleph field, i.e. AB , maintain whole words
SPLITW(sep,h,str,from) - only for final Aleph field, split on string
CONF(filed,value,0/1) - confirm validity of output line (check other field)
CONFL(substr,0/1) - confirm validity of output line (check field being processed)
CUT(prefix,postfix) - remove substring from side
RANGE(MIN,MAX) - select items in repetitive fields
bibconvert character TYPES
=======================
ALPHA - alphabetic
NALPHA - not alpphabetic
NUM - numeric
NNUM - not numeric
ALNUM - alphanumeric
NALNUM - non alphanumeric
LOWER - lowercase
UPPER - uppercase
PUNCT - punctual
NPUNCT - non punctual
SPACE - space
"""
out = value
fn = fn + "()"
par = get_pars(fn)[1]
fn = get_pars(fn)[0]
value = sub_keywd(value)
par_tmp =[]
for item in par:
item = sub_keywd(item)
par_tmp.append(item)
par = par_tmp
if (fn == "KB"):
new_value = ""
par = set_par_defaults(par,"KB,0")
new_value = crawl_KB(par[0],value,par[1])
out = new_value
elif (fn == "ADD"):
par = set_par_defaults(par,",")
out = par[0] + value + par[1]
elif (fn == "ABR"):
par = set_par_defaults(par,"1,.")
out = value[:string.atoi(par[0])] + par[1]
elif (fn == "ABRW"):
tmp = FormatField(value,"ABR(1,.)")
tmp = tmp.upper()
out = tmp
elif (fn == "ABRX"):
par = set_par_defaults(par,",")
toout = []
tmp = value.split(" ")
for wrd in tmp:
if (len(wrd) > string.atoi(par[0])):
wrd = wrd[:string.atoi(par[0])] + par[1]
toout.append(wrd)
out = string.join(toout," ")
elif (fn == "SUP"):
par = set_par_defaults(par,",")
if(par[0]=="NUM"):
out = re.sub('\d+',par[1],value)
if(par[0]=="NNUM"):
out = re.sub('\D+',par[1],value)
if(par[0]=="ALPHA"):
out = re.sub('[a-zA-Z]+',par[1],value)
if(par[0]=="NALPHA"):
out = re.sub('[^a-zA-Z]+',par[1],value)
if((par[0]=="ALNUM")or(par[0]=="NPUNCT")):
out = re.sub('\w+',par[1],value)
if(par[0]=="NALNUM"):
out = re.sub('\W+',par[1],value)
if(par[0]=="PUNCT"):
out = re.sub('\W+',par[1],value)
if(par[0]=="LOWER"):
out = re.sub('[a-z]+',par[1],value)
if(par[0]=="UPPER"):
out = re.sub('[A-Z]+',par[1],value)
if(par[0]=="SPACE"):
out = re.sub('\s+',par[1],value)
elif (fn == "LIM"):
par = set_par_defaults(par,",")
if (par[1] == "L"):
out = value[(len(value) - string.atoi(par[0])):]
if (par[1] == "R"):
out = value[:string.atoi(par[0])]
elif (fn == "LIMW"):
par = set_par_defaults(par,",")
tmp = value.split(par[0])
if (par[1] == "L"):
out = par[0] + tmp[1]
if (par[1] == "R"):
out = tmp[0] + par[0]
elif (fn == "WORDS"):
tmp2 = [value]
par = set_par_defaults(par,",")
if (par[1] == "R"):
tmp = value.split(" ")
tmp2 = []
i = 0
while (i < string.atoi(par[0])):
tmp2.append(tmp[i])
i = i + 1
if (par[1] == "L"):
tmp = value.split(" ")
tmp.reverse()
tmp2 = []
i = 0
while (i < string.atoi(par[0])):
tmp2.append(tmp[i])
i = i + 1
tmp2.reverse()
out = string.join(tmp2, " ")
elif (fn == "MINL"):
par = set_par_defaults(par,"1")
tmp = value.split(" ")
tmp2 = []
i = 0
for wrd in tmp:
if (len(wrd) >= string.atoi(par[0])):
tmp2.append(wrd)
out = string.join(tmp2, " ")
elif (fn == "MINLW"):
par = set_par_defaults(par,"1")
if (len(value) >= string.atoi(par[0])):
out = value
else:
out = ""
elif (fn == "MAXL"):
par = set_par_defaults(par,"4096")
tmp = value.split(" ")
tmp2 = []
i = 0
for wrd in tmp:
if (len(wrd) <= string.atoi(par[0])):
tmp2.append(wrd)
out = string.join(tmp2, " ")
elif (fn == "REP"):
set_par_defaults(par,",")
if (par[0]!= ""):
out = value.replace(par[0],par[1])
elif (fn == "SHAPE"):
if (value != ""):
out = value.strip()
elif (fn == "UP"):
out = value.upper()
elif (fn == "DOWN"):
out = value.lower()
elif (fn == "CAP"):
tmp = value.split(" ")
out2 = []
for wrd in tmp:
wrd2 = wrd.capitalize()
out2.append(wrd2)
out = string.join(out2," ")
elif (fn == "IF"):
par = set_par_defaults(par,",,")
if (value == par[0]):
out = par[1]
else:
out = par[2]
if (out == "ORIG"):
out = value
elif (fn == "EXP"):
par = set_par_defaults(par,",0")
tmp = value.split(" ")
out2 = []
for wrd in tmp:
if ((len(wrd.split(par[0])) == 1)and(par[1]=="1")):
out2.append(wrd)
if ((len(wrd.split(par[0])) != 1)and(par[1]=="0")):
out2.append(wrd)
out = string.join(out2," ")
elif (fn == "EXPW"):
par = set_par_defaults(par,",0")
tmp = value.split(" ")
out2 = []
for wrd in tmp:
if ((FormatField(wrd,"SUP(" + par[0] + ")") == wrd)and(par[1]=="1")):
out2.append(wrd)
if ((FormatField(wrd,"SUP(" + par[0] + ")") != wrd)and(par[1]=="0")):
out2.append(wrd)
out = string.join(out2," ")
elif (fn == "SPLIT"):
par = set_par_defaults(par,"%d,0,,1" % conv_setting[1])
length = string.atoi(par[0]) + (string.atoi(par[1]))
header = string.atoi(par[1])
headerplus = par[2]
starting = string.atoi(par[3])
line = ""
tmp2 = []
tmp3 = []
tmp = value.split(" ")
linenumber = 1
if (linenumber >= starting):
tmp2.append(headerplus)
line = line + headerplus
for wrd in tmp:
line = line + " " + wrd
tmp2.append(wrd)
if (len(line) > length):
linenumber = linenumber + 1
line = tmp2.pop()
toout = string.join(tmp2)
tmp3.append(toout)
tmp2 = []
line2 = value[:header]
if (linenumber >= starting):
line3 = line2 + headerplus + line
else:
line3 = line2 + line
line = line3
tmp2.append(line)
tmp3.append(line)
out = string.join(tmp3,"\n")
out = FormatField(out,"SHAPE()")
elif (fn == "SPLITW"):
par = set_par_defaults(par,",0,,1")
str = par[0]
header = string.atoi(par[1])
headerplus = par[2]
starting = string.atoi(par[3])
counter = 1
tmp2 = []
tmp = value.split(par[0])
last = tmp.pop()
for wrd in tmp:
counter = counter + 1
if (counter >= starting):
tmp2.append(value[:header] + headerplus + wrd + par[0])
else:
tmp2.append(value[:header] + wrd + par[0])
if (last != ""):
counter = counter + 1
if (counter >= starting):
tmp2.append(value[:header] + headerplus + last)
else:
tmp2.append(value[:header] + last)
out = string.join(tmp2,"\n")
elif (fn == "CONF"):
par = set_par_defaults(par,",,1")
found = 0
data = select_line(par[0],data_parsed)
for line in data:
if (par[1] == ""):
if (line == ""):
found = 1
elif (len(re.split(par[1],line)) > 1):
found = 1
if ((found == 1)and(string.atoi(par[2]) == 1)):
out = value
if ((found == 1)and(string.atoi(par[2]) == 0)):
out = ""
if ((found == 0)and(string.atoi(par[2]) == 1)):
out = ""
if ((found == 0)and(string.atoi(par[2]) == 0)):
out = value
return out
elif (fn == "CONFL"):
set_par_defaults(par,",1")
if (re.search(par[0],value)):
if (string.atoi(par[1]) == 1):
out = value
else:
out = ""
else:
if (string.atoi(par[1]) == 1):
out = ""
else:
out = value
return out
elif (fn == "CUT"):
par = set_default_pars(par,",")
left = value[:len(par[0])]
right = value[-(len(par[1])):]
if (left == par[0]):
out = out[len(par[0]):]
if (right == par[1]):
out = out[:-(len(par[1]))]
return out
elif (fn == "NUM"):
tmp = re.findall('\d',value)
out = string.join(tmp,"")
return out
def printInfo():
"print out when not enough parmeters given"
print """
BibConvert data convertor
Usage: bibconvert [options] -ctemplate.cfg < input.dat
Options:
-c'config' configuration templates file
-d'directory' source_data fields are located in separated files in 'directory'one record)
-h help
-l'length' minimum line length (default = 1)
-o'value' OAI identifier starts with specified value (default = 1)
-b'file header' insert file header
-e'file footer' insert file footer
-s'record separator' record separator, default empty line (EOLEOL)
-Cx'field extraction template' alternative to -c when configuration is split to several files
-Cs'source data template' alternative to -c when configuration is split to several files
-Ct'target data template' alternative to -c when configuration is split to several files
"""
def printHelp():
"print out help"
print """
BibConvert data convertor
Usage: bibconvert [options] -ctemplate.cfg < input.dat
Options:
-c'config' configuration templates file
-d'directory' source_data fields are located in separated files in 'directory'one record)
-h help
-l'length' minimum line length (default = 1)
-o'value' OAI identifier starts with specified value (default = 1)
-b'file header' insert file header
-e'file footer' insert file footer
-s'record separator' record separator, default empty line (EOLEOL)
-Cx'field extraction template' alternative to -c when configuration is split to several files
-Cs'source data template' alternative to -c when configuration is split to several files
-Ct'target data template' alternative to -c when configuration is split to several files
Example:
--------
Creation of an XML metadata container in output.xml file from text input file,
bibconvert -o1 -l1 -csample.cfg < sample.dat > output.xml
-l1 print out all output lines
-o1 create OAI identifiers starting with value 1
-c* data conversion configuration templates
"""
def exit_on_error(error_message):
"exit when error occured"
sys.stderr.write("\n bibconvert data convertor\n")
sys.stderr.write(" Error: %s\n" % error_message)
sys.exit()
return 0
def create_record():
"Create output record"
out = []
field_data_item_LIST = []
for T_tpl_item_LIST in target_tpl_parsed:
to_output = []
rows = 1
for field_tpl_item_STRING in T_tpl_item_LIST[1]:
DATA = []
if (field_tpl_item_STRING[:2]=="<:"):
field_tpl_item_STRING = field_tpl_item_STRING[2:-2]
field = field_tpl_item_STRING.split("::")[0]
if (len(field_tpl_item_STRING.split("::")) == 1):
value = generate(field)
to_output.append([value])
else:
subfield = field_tpl_item_STRING.split("::")[1]
if (field[-1] == "*"):
repetitive = 1
field = field[:-1]
else:
repetitive = 0
if (is_opt("-d",opt_list)[:2]=="-d"):
DATA = select_line(field,data_parsed)
else:
DATA = select_line(field,data_parsed)
if (repetitive == 0):
DATA = [string.join(DATA," ")]
SRC_TPL = select_line(field,source_tpl_parsed)
try:
if (DATA[0] != ""):
DATA = get_subfields(DATA,subfield,SRC_TPL)
FF = field_tpl_item_STRING.split("::")
if (len(FF) > 2):
FF = FF[2:]
for fn in FF:
# DATAFORMATTED = []
if (len(DATA) != 0 and DATA[0] != ""):
DATA = get_subfields(DATA,subfield,SRC_TPL)
FF = field_tpl_item_STRING.split("::")
if (len(FF) > 2):
FF = FF[2:]
for fn2 in FF:
DATAFORMATTED = []
for item in DATA:
item = FormatField(item,fn)
DATAFORMATTED.append(item)
DATA = DATAFORMATTED
if (len(DATA) > rows):
rows = len(DATA)
to_output.append(DATA)
except IndexError, e:
pass
else:
to_output.append([field_tpl_item_STRING])
current = 0
while (current < rows):
line_to_print = []
for item in to_output:
if (item==[]):
item =['']
if (len(item) <= current):
printout = item[0]
else:
printout = item[current]
line_to_print.append(printout)
output = exp_n(string.join(line_to_print,""))
global_formatting_functions = T_tpl_item_LIST[0].split("::")[1:]
for GFF in global_formatting_functions:
if (GFF[:5] == "RANGE"):
parR = get_pars(GFF)[1]
parR = set_par_defaults(parR,"MIN,MAX")
if (parR[0]!="MIN"):
if (string.atoi(parR[0]) > (current+1)):
output = ""
if (parR[1]!="MAX"):
if (string.atoi(parR[1]) < (current+1)):
output = ""
else:
output = FormatField(output,GFF)
if (len(output) > conv_setting[0]):
print output
current = current + 1
return
### MAIN ###
try:
import fileinput
import string
import os
import re
import sys
import time
except ImportError, e:
print "Error: %s" % e
import sys
sys.exit(1)
from time import gmtime, strftime, localtime
import os.path
tcounter = 0
conv_setting = set_conv()
sysno = generate("DATE(%w%H%M%S)")
if(len(sys.argv) < 2):
printInfo()
sys.exit(0)
opt_list = get_options()
arg_list = get_arguments()
if(len(opt_list) == 0):
printInfo()
sys.exit(0)
elif (is_opt("-h",opt_list)[:2] == "-h"):
printHelp()
sys.exit(0)
else:
source_data = ""
separator = ""
if (is_opt("-s",opt_list)[:2] == "-s"):
separator = is_opt("-s",opt_list)[2:]
if (is_opt("-d",opt_list)[:2] == "-d"):
source_data = is_opt("-d",opt_list)[2:]
source_data = source_data + "/"
extract_tpl = "/"
else:
if (is_opt("-Cx",opt_list)[:3] == "-Cx"):
extract_tpl = is_opt("-Cx",opt_list)[3:]
extract_tpl_parsed = parse_template(extract_tpl)
elif (is_opt("-c",opt_list)[:2] == "-c"):
extract_tpl = is_opt("-c",opt_list)[2:]
extract_tpl_parsed = parse_common_template(extract_tpl,1)
else:
printInfo()
sys.exit(0)
if (is_opt("-Cs",opt_list)[:3] == "-Cs"):
source_tpl = is_opt("-Cs",opt_list)[3:]
source_tpl_parsed = parse_template(source_tpl)
elif (is_opt("-c",opt_list)[:2] == "-c"):
source_tpl = is_opt("-c",opt_list)[2:]
source_tpl_parsed = parse_common_template(source_tpl,2)
else:
printInfo()
sys.exit(0)
if (is_opt("-Ct",opt_list)[:3] == "-Ct"):
target_tpl = is_opt("-Ct",opt_list)[3:]
target_tpl_parsed = parse_template(target_tpl)
elif (is_opt("-c",opt_list)[:2] == "-c"):
target_tpl = is_opt("-c",opt_list)[2:]
target_tpl_parsed = parse_common_template(target_tpl,3)
else:
printInfo()
sys.exit(0)
if (is_opt("-t",opt_list)[:2] == "-t"):
output_rec_sep = is_opt("-t",opt_list)[2:]
else:
output_rec_sep = ""
if (is_opt("-b",opt_list)[:2] == "-b"):
begin_header = is_opt("-b",opt_list)[2:]
else:
begin_header = ""
if (is_opt("-e",opt_list)[:2] == "-e"):
ending_footer = is_opt("-e",opt_list)[2:]
else:
ending_footer = ""
if (is_opt("-l",opt_list)[:2] == "-l"):
try:
conv_setting[0] = string.atoi(is_opt("-l",opt_list)[2:])
except ValueError, e:
conv_setting[0] = 1
if (is_opt("-o",opt_list)[:2] == "-o"):
try:
oai_identifier_from = string.atoi(is_opt("-o",opt_list)[2:])
except ValueError, e:
oai_identifier_from = 1
else:
oai_identifier_from = 1
if (is_opt("-d",opt_list)[:2] == "-d"):
if (os.path.isdir(source_data)):
data_parsed = parse_input_data_d(source_data,source_tpl)
create_record()
tcounter = tcounter + 1
else:
exit_on_error("Cannot access directory: %s" % source_data)
if (is_opt("-d",opt_list)[:2] == ""):
done = 0
print begin_header
while (done == 0):
data_parsed = parse_input_data_fx(source_tpl)
if (data_parsed == -1):
done = 1
else:
if (data_parsed[0][0]!= ''):
create_record()
tcounter = tcounter + 1
print output_rec_sep
print ending_footer
</protect>

Event Timeline