bibconvert.in
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, May 11, 11:22

bibconvert.in
View Options

	## $Id$

	## This file is part of the CERN Document Server Software (CDSware).
	## Copyright (C) 2002 CERN.
	##
	## The CDSware is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## The CDSware is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDSware; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	## read config variables:
	#include "config.wml"
	#include "configbis.wml"

	## start Python:
	<protect>#!</protect><PYTHON>
	<protect>## $Id$</protect>
	<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>

	## okay, rest of the Python code goes below
	#######

	<protect>

	def set_conv():
	"""
	bibconvert common settings
	=======================
	minimal length of output line = 1
	maximal length of output line = 4096
	"""

	conv_setting = [
	1,
	4096
	]

	return conv_setting

	def get_options():
	"Read command line options into list"

	out = []
	for arg in sys.argv:
	if (arg[:1] == "-"):
	out.append(arg)
	return out

	def get_arguments():
	"Read command line arguments into list"

	out = []
	for arg in sys.argv:
	if (arg[:1] != "-"):
	out.append(arg)
	return out

	def get_pars(fn):
	"Read function and its parameters into list"

	out = []

	out.append(re.split('$\|$',fn)[0])
	out.append(re.split(',',re.split('$\|$',fn)[1]))

	return out


	def is_opt(seek,opt_list):
	"Return entire argument if given in the list of options"

	out = ""

	for arg in opt_list:
	if (seek == arg[:2]):
	out = arg
	if (seek == arg[:3]):
	out = arg
	return out

	def append_to_output_file(filename, output):
	"bibconvert output file creation by output line"

	try:
	file = open(filename,'a')
	file.write(output)
	file.close()
	except IOError, e:
	exit_on_error("Cannot write into %s" % filename)

	return 1

	def sub_keywd(out):
	"bibconvert keywords literal substitution"


	out = string.replace(out,"EOL","\n")
	out = string.replace(out,"_CR_","\r")
	out = string.replace(out,"_LF_","\n")
	out = string.replace(out,"\\",'\\')
	out = string.replace(out,"\r",'\r')
	out = string.replace(out,"BSLASH",'\\')
	out = string.replace(out,"COMMA",',')
	out = string.replace(out,"LEFTB",'[')
	out = string.replace(out,"RIGHTB",']')
	out = string.replace(out,"LEFTP",'(')
	out = string.replace(out,"RIGHTP",')')

	return out


	def check_split_on(data_item_split, sep, tpl_f):
	"""
	bibconvert conditional split with following conditions
	===================================================
	::NEXT(N,TYPE,SIDE) - next N chars are of the TYPE having the separator on the SIDE
	::PREV(N,TYPE,SIDE) - prev.N chars are of the TYPE having the separator on the SIDE
	"""

	fn = get_pars(tpl_f)[0]
	par = get_pars(tpl_f)[1]


	done = 0
	while (done == 0):

	if ( (( fn == "NEXT" ) and ( par[2]=="R" )) or
	(( fn == "PREV" ) and ( par[2]=="L" )) ):

	test_value = data_item_split[0][-(string.atoi(par[0])):]

	elif ( ((fn == "NEXT") and ( par[2]=="L")) or
	((fn == "PREV") and ( par[2]=="R")) ):

	test_value = data_item_split[1][:(string.atoi(par[0]))]

	data_item_split_tmp = []

	if ((FormatField(test_value,"SUP(" + par[1] + ",)") != "")or(len(test_value) < string.atoi(par[0]))):
	data_item_split_tmp = data_item_split[1].split(sep,1)

	if(len(data_item_split_tmp)==1):
	done = 1
	data_item_split[0] = data_item_split[0] + sep + data_item_split_tmp[0]
	data_item_split[1] = ""
	else:
	data_item_split[0] = data_item_split[0] + sep + data_item_split_tmp[0]
	data_item_split[1] = data_item_split_tmp[1]
	else:
	done = 1

	return data_item_split

	def get_subfields(data,subfield,src_tpl):
	"Get subfield according to the template"

	out = []

	for data_item in data:
	found = 0
	for src_tpl_item in src_tpl:
	if (src_tpl_item[:2] == "<:"):
	if (src_tpl_item[2:-2] == subfield):
	found = 1
	else:
	sep_in_list = src_tpl_item.split("::")
	sep = sep_in_list[0]

	data_item_split = data_item.split(sep,1)

	if (len(data_item_split)==1):
	data_item = data_item_split[0]
	else:
	if (len(sep_in_list) > 1):
	data_item_split = check_split_on(data_item.split(sep,1), sep_in_list[0],sep_in_list[1])
	if(found == 1):
	data_item = data_item_split[0]
	else:
	data_item = string.join(data_item_split[1:],sep)

	out.append(data_item)

	return out

	def exp_n(word):
	"Replace newlines and carriage return's from string."

	out = ""

	for ch in word:
	if ((ch != '\n') and (ch != '\r')):
	out = out + ch
	return out

	def exp_e(list):
	"Expunge empty elements from a list"

	out = []

	for item in list:
	item = exp_n(item)
	if ((item != '\r\n' and item != '\r' and item != '\n' and item !="" and len(item)!=0)):
	out.append(item)
	return out

	def sup_e(word):
	"Replace spaces"

	out = ""

	for ch in word:
	if (ch != ' '):
	out = out + ch
	return out

	def select_line(field_code, list):
	"Return appropriate item from a list"

	out = ['']

	for field in list:

	field[0] = sup_e(field[0])
	field_code = sup_e(field_code)

	if (field[0] == field_code):

	out = field[1]

	return out

	def parse_field_definition(source_field_definition):
	"Create list of source_field_definition"

	word_list = []
	out = []
	word = ""
	counter = 0

	if (len(source_field_definition.split("---"))==4):
	out = source_field_definition.split("---")

	else:

	element_list_high = source_field_definition.split("<:")
	for word_high in element_list_high:
	element_list_low = word_high.split(':>')
	for word_low in element_list_low:
	word_list.append(word_low)
	word_list.append(":>")
	word_list.pop()
	word_list.append("<:")
	word_list.pop()

	for item in word_list:
	word = word + item
	if (item == "<:"):
	counter = counter + 1
	if (item == ":>"):
	counter = counter - 1

	if counter == 0:
	out.append(word)
	word = ""

	return out


	def parse_template(template):
	"""
	bibconvert parse template
	======================
	in - template filename
	out - [ [ field_code , [ field_template_parsed ] , [] ]
	"""
	out = []

	for field_def in read_file(template,1):

	field_tpl_new = []

	if ((len(field_def.split("---",1)) > 1) and (field_def[:1]!="#")):

	field_code = field_def.split("---",1)[0]
	field_tpl = parse_field_definition(field_def.split("---",1)[1])

	field_tpl_new = field_tpl
	field_tpl = exp_e(field_tpl_new)

	out_data = [field_code, field_tpl]
	out.append(out_data)

	return out

	def parse_common_template(template,part):
	"""
	bibconvert parse template
	=========================
	in - template filename
	out - [ [ field_code , [ field_template_parsed ] , [] ]
	"""
	out = []
	counter = 0

	for field_def in read_file(template,1):

	if (exp_n(field_def)[:3] == "==="):
	counter = counter + 1

	elif (counter == part):

	field_tpl_new = []
	if ((len(field_def.split("---",1)) > 1) and (field_def[:1]!="#")):

	field_code = field_def.split("---",1)[0]
	field_tpl = parse_field_definition(field_def.split("---",1)[1])

	field_tpl_new = field_tpl
	field_tpl = exp_e(field_tpl_new)

	out_data = [field_code, field_tpl]
	out.append(out_data)

	return out

	def parse_input_data_f(source_data_open, source_tpl):
	"""
	bibconvert parse input data
	========================
	in - input source data location (filehandle)
	source data template
	source_field_code list of source field codes
	source_field_data list of source field data values (repetitive fields each line one occurence)
	out - [ [ source_field_code , [ source_field_data ] ] , [] ]

	source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
	destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]

	input data file; by line: - fieldcode value
	"""

	out = [['',[]]]
	count = 0
	values = []

	while (count < 1):

	line = source_data_open.readline()
	if (line == ""):
	return(-1)
	line_split = line.split(" ",1)

	if (re.sub("\s","",line) == separator):
	count = count + 1

	if (len(line_split) == 2):

	field_code = line_split[0]
	field_value = exp_n(line_split[1])

	values.append([field_code,field_value])

	item_prev = ""
	stack = ['']

	for item in values:

	if ((item[0]==item_prev)or(item_prev == "")):
	stack.append(item[1])
	item_prev = item[0]
	else:
	out.append([item_prev,stack])
	item_prev = item[0]
	stack = []
	stack.append(item[1])

	try:
	if (stack[0] != ""):
	if (out[0][0]==""):
	out = []
	out.append([field_code,stack])
	except IndexError, e:
	out = out

	return out

	def parse_input_data_fx(source_tpl):
	"""
	bibconvert parse input data
	========================
	in - input source data location (filehandle)
	source data template
	source_field_code list of source field codes
	source_field_data list of source field data values (repetitive fields each line one occurence)
	out - [ [ source_field_code , [ source_field_data ] ] , [] ]

	extraction_template_entry -

	input data file - specified by extract_tpl
	"""

	count = 0
	record = ""
	field_data_1_in_list = []
	out = [['',[]]]

	while (count <1):
	line = sys.stdin.readline()
	if (line == ""):
	if (record == ""):
	return (-1)
	if (re.sub("\s","",line) == separator):
	count = count + 1
	else:
	record = record + line

	for field_defined in extract_tpl_parsed:
	try:
	field_defined[1][0] = sub_keywd(field_defined[1][0])
	field_defined[1][1] = sub_keywd(field_defined[1][1])
	except IndexError, e:
	field_defined = field_defined

	try:
	field_defined[1][2] = sub_keywd(field_defined[1][2])
	except IndexError, e:
	field_defined = field_defined

	field_data_1 =""
	try:
	if (len(record.split(field_defined[1][0])) == 1):
	field_data_1 = ""
	field_data_1_in_list = []
	else:
	field_data_1_tmp = record.split(field_defined[1][0],1)[1]
	field_data_1_in_list = field_data_1_tmp.split(field_defined[1][0])

	except IndexError, e:
	field_data_1 = ""

	spliton = []
	outvalue = ""
	field_data_2 = ""
	field_data = ""

	try:
	if ((field_defined[1][1])=="EOL"):
	spliton = ['\n']
	elif ((field_defined[1][1])=="MIN"):
	spliton = ['\n']
	elif ((field_defined[1][1])=="MAX"):
	for item in extract_tpl_parsed:
	try:
	spliton.append(item[1][0])
	except IndexError, e:
	spliton = spliton


	else:
	spliton = [field_defined[1][1]]

	except IndexError,e :
	spliton = ""

	outvalues = []

	for field_data in field_data_1_in_list:
	outvalue = ""

	for splitstring in spliton:

	field_data_2 = ""
	if (len(field_data.split(splitstring))==1):
	if (outvalue == ""):
	field_data_2 = field_data
	else:
	field_data_2 = outvalue
	else:
	field_data_2 = field_data.split(splitstring)[0]

	outvalue = field_data_2
	field_data = field_data_2

	outvalues.append(outvalue)
	outvalues = exp_e(outvalues)

	if (len(outvalues) > 0):
	if (out[0][0]==""):
	out = []

	outstack = []

	if (len(field_defined[1])==3):
	for item in outvalues:
	stack = item.split(field_defined[1][2])
	for stackitem in stack:
	outstack.append(stackitem)
	else:
	outstack = outvalues

	out.append([field_defined[0],outstack])
	return out


	def parse_input_data_d(source_data, source_tpl):
	"""
	bibconvert parse input data
	========================
	in - input source data location (directory)
	source data template
	source_field_code list of source field codes
	source_field_data list of source field data values (repetitive fields each line one occurence)
	out - [ [ source_field_code , [ source_field_data ] ] , [] ]

	source_data_template entry - field_code---[const]<:subfield_code:>[const][<:subfield_code:>][]
	destination_templace entry - [::GFF()]---[const]<:field_code::subfield_code[::FF()]:>[]

	input data dir; by file: - fieldcode value per line
	"""

	out = []

	for source_field_tpl in read_file(source_tpl,1):
	source_field_code = source_field_tpl.split("---")[0]
	source_field_data = read_file(source_data + source_field_code,0)

	source_field_data = exp_e(source_field_data)

	out_data = [source_field_code, source_field_data]
	out.append(out_data)

	return out


	def sub_empty_lines(value):
	out = re.sub('\n\n+','',value)
	return out

	def set_par_defaults(par1,par2):
	"Set default parameter when not defined"

	par_new_in_list = par2.split(",")
	i = 0
	out = []
	for par in par_new_in_list:

	if (len(par1)>i):
	if (par1[i] == ""):
	out.append(par)
	else:
	out.append(par1[i])
	else:
	out.append(par)
	i = i + 1

	return out

	def generate(keyword):
	"""
	bibconvert generaded values:
	=========================
	SYSNO() - generate date as '%w%H%M%S'
	WEEK(N) - generate date as '%V' with shift (N)
	DATE(format) - generate date in specifieddate FORMAT
	VALUE(value) - enter value literarly
	OAI() - generate oai_identifier, starting value given at command line as -o<value>
	"""

	out = keyword

	fn = keyword + "()"

	par = get_pars(fn)[1]
	fn = get_pars(fn)[0]


	par = set_par_defaults(par,"")

	if (fn == "SYSNO"):
	out = sysno
	if (fn == "WEEK"):
	par = set_par_defaults(par,"0")
	out = "%02d" % (string.atoi(strftime("%V",localtime())) + string.atoi(par[0]))
	if (string.atoi(out)<0):
	out = "00"

	if (fn == "VALUE"):
	par = set_par_defaults(par,"")
	out = par[0]
	if (fn == "DATE"):
	par = set_par_defaults(par,"%w%H%M%S," + "%d" % conv_setting[1])
	out = strftime(par[0],localtime())
	out = out[:string.atoi(par[1])]
	if (fn == "OAI"):
	oai_prefix = "</protect><OAIIDPREFIX><protect>"
	out = "%s:%d" % (oai_prefix,tcounter + oai_identifier_from)

	return out

	def read_file(filename,exception):
	"Read file into list"

	out = []

	if (os.path.isfile(filename)):
	file = open(filename,'r')
	out = file.readlines()
	file.close()
	else:
	if exception:
	exit_on_error("Cannot access file: %s" % filename)
	return out

	def crawl_KB(filename,value,mode):
	"""
	bibconvert look-up value in KB_file in one of following modes:
	===========================================================
	1 - case sensitive / match (default)
	2 - not case sensitive / search
	3 - case sensitive / search
	4 - not case sensitive / match
	5 - case sensitive / search (in KB)
	6 - not case sensitive / search (in KB)
	7 - case sensitive / search (reciprocal)
	8 - not case sensitive / search (reciprocal)
	9 - replace by _DEFAULT_ only

	"""

	if (os.path.isfile(filename) != 1):
	pathtmp = string.split(extract_tpl,"/")
	pathtmp.pop()
	path = string.join(pathtmp,"/")
	filename = path + "/" + filename

	if (os.path.isfile(filename)):

	file_to_read = open(filename,"r")

	file_read = file_to_read.readlines()
	for line in file_read:
	code = string.split(line,"---")

	if (mode == "2"):
	value_to_cmp = string.lower(value)
	code[0] = string.lower(code[0])

	if ((len(string.split(value_to_cmp,code[0])) > 1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif ((mode == "3") or (mode == "0")):
	if ((len(string.split(value,code[0])) > 1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "4"):
	value_to_cmp = string.lower(value)
	code[0] = string.lower(code[0])
	if ((code[0] == value_to_cmp)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "5"):
	if ((len(string.split(code[0],value)) > 1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "6"):
	value_to_cmp = string.lower(value)
	code[0] = string.lower(code[0])
	if ((len(string.split(code[0],value_to_cmp)) > 1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "7"):
	if ((len(string.split(code[0],value)) > 1)or(len(string.split(value,code[0])) > 1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "8"):
	value_to_cmp = string.lower(value)
	code[0] = string.lower(code[0])
	if ((len(string.split(code[0],value_to_cmp)) > 1)or(len(string.split(value_to_cmp,code[0]))>1)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	elif (mode == "9"):
	if (code[0]=="_DEFAULT_"):
	value = code[1]
	return value

	else:
	if ((code[0] == value)or(code[0]=="_DEFAULT_")):
	value = code[1]
	return value

	return value


	def FormatField(value,fn):
	"""
	bibconvert formatting functions:
	=============================
	ADD(prefix,suffix) - add prefix/suffix
	KB(kb_file) - lookup in kb_file and replace value
	ABR(N,suffix) - abbreviate to N places with suffix
	ABRX() - abbreviate exclusively words longer
	ABRW() - abbreviate word (limit from right)
	REP(x,y) - replace
	SUP(type) - remove characters of certain TYPE
	LIM(n,side) - limit to n letters from L/R
	LIMW(string,side) - L/R after split on string
	WORDS(n,side) - limit to n words from L/R
	IF(value,valueT,valueF) - replace on IF condition
	MINL(n) - replace words shorter than n
	MINLW(n) - replace words shorter than n
	MAXL(n) - replace words longer than n
	EXPW(type) - replace word from value containing TYPE
	EXP(STR,0/1) - replace word from value containing string
	NUM() - take only digits in given string
	SHAPE() - remove extra space
	UP() - to uppercase
	DOWN() - to lowercase
	CAP() - make capitals each word
	SPLIT(n,h,str,from) - only for final Aleph field, i.e. AB , maintain whole words
	SPLITW(sep,h,str,from) - only for final Aleph field, split on string
	CONF(filed,value,0/1) - confirm validity of output line (check other field)
	CONFL(substr,0/1) - confirm validity of output line (check field being processed)
	CUT(prefix,postfix) - remove substring from side
	RANGE(MIN,MAX) - select items in repetitive fields

	bibconvert character TYPES
	=======================
	ALPHA - alphabetic
	NALPHA - not alpphabetic
	NUM - numeric
	NNUM - not numeric
	ALNUM - alphanumeric
	NALNUM - non alphanumeric
	LOWER - lowercase
	UPPER - uppercase
	PUNCT - punctual
	NPUNCT - non punctual
	SPACE - space
	"""

	out = value
	fn = fn + "()"

	par = get_pars(fn)[1]
	fn = get_pars(fn)[0]

	value = sub_keywd(value)
	par_tmp =[]
	for item in par:
	item = sub_keywd(item)
	par_tmp.append(item)
	par = par_tmp


	if (fn == "KB"):
	new_value = ""

	par = set_par_defaults(par,"KB,0")

	new_value = crawl_KB(par[0],value,par[1])

	out = new_value

	elif (fn == "ADD"):

	par = set_par_defaults(par,",")
	out = par[0] + value + par[1]

	elif (fn == "ABR"):
	par = set_par_defaults(par,"1,.")
	out = value[:string.atoi(par[0])] + par[1]

	elif (fn == "ABRW"):

	tmp = FormatField(value,"ABR(1,.)")
	tmp = tmp.upper()
	out = tmp

	elif (fn == "ABRX"):
	par = set_par_defaults(par,",")
	toout = []
	tmp = value.split(" ")
	for wrd in tmp:

	if (len(wrd) > string.atoi(par[0])):
	wrd = wrd[:string.atoi(par[0])] + par[1]
	toout.append(wrd)
	out = string.join(toout," ")

	elif (fn == "SUP"):

	par = set_par_defaults(par,",")

	if(par[0]=="NUM"):
	out = re.sub('\d+',par[1],value)

	if(par[0]=="NNUM"):
	out = re.sub('\D+',par[1],value)

	if(par[0]=="ALPHA"):
	out = re.sub('[a-zA-Z]+',par[1],value)

	if(par[0]=="NALPHA"):
	out = re.sub('[^a-zA-Z]+',par[1],value)

	if((par[0]=="ALNUM")or(par[0]=="NPUNCT")):
	out = re.sub('\w+',par[1],value)

	if(par[0]=="NALNUM"):
	out = re.sub('\W+',par[1],value)

	if(par[0]=="PUNCT"):
	out = re.sub('\W+',par[1],value)


	if(par[0]=="LOWER"):
	out = re.sub('[a-z]+',par[1],value)

	if(par[0]=="UPPER"):
	out = re.sub('[A-Z]+',par[1],value)

	if(par[0]=="SPACE"):
	out = re.sub('\s+',par[1],value)

	elif (fn == "LIM"):
	par = set_par_defaults(par,",")

	if (par[1] == "L"):
	out = value[(len(value) - string.atoi(par[0])):]
	if (par[1] == "R"):
	out = value[:string.atoi(par[0])]

	elif (fn == "LIMW"):
	par = set_par_defaults(par,",")
	tmp = value.split(par[0])
	if (par[1] == "L"):
	out = par[0] + tmp[1]
	if (par[1] == "R"):
	out = tmp[0] + par[0]

	elif (fn == "WORDS"):
	tmp2 = [value]
	par = set_par_defaults(par,",")
	if (par[1] == "R"):
	tmp = value.split(" ")
	tmp2 = []
	i = 0
	while (i < string.atoi(par[0])):
	tmp2.append(tmp[i])
	i = i + 1
	if (par[1] == "L"):
	tmp = value.split(" ")
	tmp.reverse()
	tmp2 = []
	i = 0
	while (i < string.atoi(par[0])):
	tmp2.append(tmp[i])
	i = i + 1
	tmp2.reverse()
	out = string.join(tmp2, " ")

	elif (fn == "MINL"):

	par = set_par_defaults(par,"1")

	tmp = value.split(" ")
	tmp2 = []
	i = 0
	for wrd in tmp:
	if (len(wrd) >= string.atoi(par[0])):
	tmp2.append(wrd)
	out = string.join(tmp2, " ")

	elif (fn == "MINLW"):
	par = set_par_defaults(par,"1")
	if (len(value) >= string.atoi(par[0])):
	out = value
	else:
	out = ""

	elif (fn == "MAXL"):
	par = set_par_defaults(par,"4096")
	tmp = value.split(" ")
	tmp2 = []
	i = 0
	for wrd in tmp:
	if (len(wrd) <= string.atoi(par[0])):
	tmp2.append(wrd)
	out = string.join(tmp2, " ")

	elif (fn == "REP"):
	set_par_defaults(par,",")
	if (par[0]!= ""):
	out = value.replace(par[0],par[1])

	elif (fn == "SHAPE"):

	if (value != ""):
	out = value.strip()

	elif (fn == "UP"):
	out = value.upper()

	elif (fn == "DOWN"):
	out = value.lower()

	elif (fn == "CAP"):
	tmp = value.split(" ")
	out2 = []
	for wrd in tmp:
	wrd2 = wrd.capitalize()
	out2.append(wrd2)
	out = string.join(out2," ")

	elif (fn == "IF"):
	par = set_par_defaults(par,",,")
	if (value == par[0]):
	out = par[1]
	else:
	out = par[2]
	if (out == "ORIG"):
	out = value

	elif (fn == "EXP"):

	par = set_par_defaults(par,",0")

	tmp = value.split(" ")
	out2 = []
	for wrd in tmp:
	if ((len(wrd.split(par[0])) == 1)and(par[1]=="1")):
	out2.append(wrd)
	if ((len(wrd.split(par[0])) != 1)and(par[1]=="0")):
	out2.append(wrd)
	out = string.join(out2," ")

	elif (fn == "EXPW"):

	par = set_par_defaults(par,",0")

	tmp = value.split(" ")
	out2 = []
	for wrd in tmp:
	if ((FormatField(wrd,"SUP(" + par[0] + ")") == wrd)and(par[1]=="1")):
	out2.append(wrd)
	if ((FormatField(wrd,"SUP(" + par[0] + ")") != wrd)and(par[1]=="0")):
	out2.append(wrd)

	out = string.join(out2," ")


	elif (fn == "SPLIT"):
	par = set_par_defaults(par,"%d,0,,1" % conv_setting[1])

	length = string.atoi(par[0]) + (string.atoi(par[1]))
	header = string.atoi(par[1])
	headerplus = par[2]
	starting = string.atoi(par[3])

	line = ""
	tmp2 = []
	tmp3 = []
	tmp = value.split(" ")

	linenumber = 1
	if (linenumber >= starting):
	tmp2.append(headerplus)
	line = line + headerplus

	for wrd in tmp:
	line = line + " " + wrd

	tmp2.append(wrd)
	if (len(line) > length):
	linenumber = linenumber + 1
	line = tmp2.pop()
	toout = string.join(tmp2)
	tmp3.append(toout)
	tmp2 = []
	line2 = value[:header]
	if (linenumber >= starting):
	line3 = line2 + headerplus + line
	else:
	line3 = line2 + line
	line = line3
	tmp2.append(line)

	tmp3.append(line)
	out = string.join(tmp3,"\n")
	out = FormatField(out,"SHAPE()")

	elif (fn == "SPLITW"):

	par = set_par_defaults(par,",0,,1")

	str = par[0]
	header = string.atoi(par[1])
	headerplus = par[2]
	starting = string.atoi(par[3])
	counter = 1

	tmp2 = []
	tmp = value.split(par[0])

	last = tmp.pop()

	for wrd in tmp:

	counter = counter + 1
	if (counter >= starting):
	tmp2.append(value[:header] + headerplus + wrd + par[0])
	else:
	tmp2.append(value[:header] + wrd + par[0])
	if (last != ""):
	counter = counter + 1
	if (counter >= starting):
	tmp2.append(value[:header] + headerplus + last)
	else:
	tmp2.append(value[:header] + last)

	out = string.join(tmp2,"\n")

	elif (fn == "CONF"):

	par = set_par_defaults(par,",,1")

	found = 0

	data = select_line(par[0],data_parsed)

	for line in data:
	if (par[1] == ""):
	if (line == ""):
	found = 1


	elif (len(re.split(par[1],line)) > 1):
	found = 1

	if ((found == 1)and(string.atoi(par[2]) == 1)):
	out = value
	if ((found == 1)and(string.atoi(par[2]) == 0)):
	out = ""
	if ((found == 0)and(string.atoi(par[2]) == 1)):
	out = ""
	if ((found == 0)and(string.atoi(par[2]) == 0)):
	out = value

	return out

	elif (fn == "CONFL"):

	set_par_defaults(par,",1")

	if (re.search(par[0],value)):
	if (string.atoi(par[1]) == 1):
	out = value
	else:
	out = ""
	else:
	if (string.atoi(par[1]) == 1):
	out = ""
	else:
	out = value
	return out

	elif (fn == "CUT"):
	par = set_default_pars(par,",")
	left = value[:len(par[0])]
	right = value[-(len(par[1])):]

	if (left == par[0]):
	out = out[len(par[0]):]
	if (right == par[1]):
	out = out[:-(len(par[1]))]


	return out

	elif (fn == "NUM"):
	tmp = re.findall('\d',value)
	out = string.join(tmp,"")

	return out

	def printInfo():
	"print out when not enough parmeters given"

	print """
	BibConvert data convertor
	Usage: bibconvert [options] -ctemplate.cfg < input.dat

	Options:
	-c'config' configuration templates file
	-d'directory' source_data fields are located in separated files in 'directory'one record)
	-h help
	-l'length' minimum line length (default = 1)
	-o'value' OAI identifier starts with specified value (default = 1)
	-b'file header' insert file header
	-e'file footer' insert file footer
	-s'record separator' record separator, default empty line (EOLEOL)

	-Cx'field extraction template' alternative to -c when configuration is split to several files
	-Cs'source data template' alternative to -c when configuration is split to several files
	-Ct'target data template' alternative to -c when configuration is split to several files
	"""

	def printHelp():
	"print out help"

	print """
	BibConvert data convertor
	Usage: bibconvert [options] -ctemplate.cfg < input.dat

	Options:
	-c'config' configuration templates file
	-d'directory' source_data fields are located in separated files in 'directory'one record)
	-h help
	-l'length' minimum line length (default = 1)
	-o'value' OAI identifier starts with specified value (default = 1)
	-b'file header' insert file header
	-e'file footer' insert file footer
	-s'record separator' record separator, default empty line (EOLEOL)

	-Cx'field extraction template' alternative to -c when configuration is split to several files
	-Cs'source data template' alternative to -c when configuration is split to several files
	-Ct'target data template' alternative to -c when configuration is split to several files

	Example:
	--------
	Creation of an XML metadata container in output.xml file from text input file,

	bibconvert -o1 -l1 -csample.cfg < sample.dat > output.xml

	-l1 print out all output lines
	-o1 create OAI identifiers starting with value 1
	-c* data conversion configuration templates
	"""

	def exit_on_error(error_message):
	"exit when error occured"

	sys.stderr.write("\n bibconvert data convertor\n")
	sys.stderr.write(" Error: %s\n" % error_message)
	sys.exit()
	return 0


	def create_record():
	"Create output record"

	out = []
	field_data_item_LIST = []

	for T_tpl_item_LIST in target_tpl_parsed:

	to_output = []
	rows = 1

	for field_tpl_item_STRING in T_tpl_item_LIST[1]:

	DATA = []

	if (field_tpl_item_STRING[:2]=="<:"):
	field_tpl_item_STRING = field_tpl_item_STRING[2:-2]

	field = field_tpl_item_STRING.split("::")[0]
	if (len(field_tpl_item_STRING.split("::")) == 1):
	value = generate(field)
	to_output.append([value])
	else:
	subfield = field_tpl_item_STRING.split("::")[1]
	if (field[-1] == "*"):
	repetitive = 1
	field = field[:-1]
	else:
	repetitive = 0

	if (is_opt("-d",opt_list)[:2]=="-d"):
	DATA = select_line(field,data_parsed)
	else:
	DATA = select_line(field,data_parsed)

	if (repetitive == 0):
	DATA = [string.join(DATA," ")]

	SRC_TPL = select_line(field,source_tpl_parsed)
	try:
	if (DATA[0] != ""):
	DATA = get_subfields(DATA,subfield,SRC_TPL)

	FF = field_tpl_item_STRING.split("::")
	if (len(FF) > 2):
	FF = FF[2:]
	for fn in FF:


	# DATAFORMATTED = []

	if (len(DATA) != 0 and DATA[0] != ""):
	DATA = get_subfields(DATA,subfield,SRC_TPL)

	FF = field_tpl_item_STRING.split("::")
	if (len(FF) > 2):
	FF = FF[2:]
	for fn2 in FF:

	DATAFORMATTED = []


	for item in DATA:
	item = FormatField(item,fn)
	DATAFORMATTED.append(item)

	DATA = DATAFORMATTED

	if (len(DATA) > rows):

	rows = len(DATA)
	to_output.append(DATA)
	except IndexError, e:
	pass

	else:
	to_output.append([field_tpl_item_STRING])

	current = 0
	while (current < rows):
	line_to_print = []
	for item in to_output:

	if (item==[]):
	item =['']

	if (len(item) <= current):
	printout = item[0]
	else:
	printout = item[current]

	line_to_print.append(printout)


	output = exp_n(string.join(line_to_print,""))

	global_formatting_functions = T_tpl_item_LIST[0].split("::")[1:]

	for GFF in global_formatting_functions:
	if (GFF[:5] == "RANGE"):

	parR = get_pars(GFF)[1]
	parR = set_par_defaults(parR,"MIN,MAX")

	if (parR[0]!="MIN"):
	if (string.atoi(parR[0]) > (current+1)):
	output = ""

	if (parR[1]!="MAX"):
	if (string.atoi(parR[1]) < (current+1)):
	output = ""

	else:
	output = FormatField(output,GFF)

	if (len(output) > conv_setting[0]):

	print output

	current = current + 1
	return

	### MAIN ###

	try:
	import fileinput
	import string
	import os
	import re
	import sys
	import time

	except ImportError, e:
	print "Error: %s" % e
	import sys
	sys.exit(1)


	from time import gmtime, strftime, localtime
	import os.path

	tcounter = 0

	conv_setting = set_conv()
	sysno = generate("DATE(%w%H%M%S)")


	if(len(sys.argv) < 2):
	printInfo()
	sys.exit(0)

	opt_list = get_options()
	arg_list = get_arguments()

	if(len(opt_list) == 0):
	printInfo()
	sys.exit(0)

	elif (is_opt("-h",opt_list)[:2] == "-h"):
	printHelp()
	sys.exit(0)

	else:

	source_data = ""
	separator = ""

	if (is_opt("-s",opt_list)[:2] == "-s"):
	separator = is_opt("-s",opt_list)[2:]

	if (is_opt("-d",opt_list)[:2] == "-d"):
	source_data = is_opt("-d",opt_list)[2:]
	source_data = source_data + "/"
	extract_tpl = "/"

	else:
	if (is_opt("-Cx",opt_list)[:3] == "-Cx"):
	extract_tpl = is_opt("-Cx",opt_list)[3:]
	extract_tpl_parsed = parse_template(extract_tpl)
	elif (is_opt("-c",opt_list)[:2] == "-c"):
	extract_tpl = is_opt("-c",opt_list)[2:]
	extract_tpl_parsed = parse_common_template(extract_tpl,1)
	else:
	printInfo()
	sys.exit(0)


	if (is_opt("-Cs",opt_list)[:3] == "-Cs"):
	source_tpl = is_opt("-Cs",opt_list)[3:]
	source_tpl_parsed = parse_template(source_tpl)
	elif (is_opt("-c",opt_list)[:2] == "-c"):
	source_tpl = is_opt("-c",opt_list)[2:]
	source_tpl_parsed = parse_common_template(source_tpl,2)
	else:
	printInfo()
	sys.exit(0)

	if (is_opt("-Ct",opt_list)[:3] == "-Ct"):
	target_tpl = is_opt("-Ct",opt_list)[3:]
	target_tpl_parsed = parse_template(target_tpl)
	elif (is_opt("-c",opt_list)[:2] == "-c"):
	target_tpl = is_opt("-c",opt_list)[2:]
	target_tpl_parsed = parse_common_template(target_tpl,3)
	else:
	printInfo()
	sys.exit(0)

	if (is_opt("-t",opt_list)[:2] == "-t"):
	output_rec_sep = is_opt("-t",opt_list)[2:]
	else:
	output_rec_sep = ""

	if (is_opt("-b",opt_list)[:2] == "-b"):
	begin_header = is_opt("-b",opt_list)[2:]
	else:
	begin_header = ""

	if (is_opt("-e",opt_list)[:2] == "-e"):
	ending_footer = is_opt("-e",opt_list)[2:]
	else:
	ending_footer = ""

	if (is_opt("-l",opt_list)[:2] == "-l"):
	try:
	conv_setting[0] = string.atoi(is_opt("-l",opt_list)[2:])
	except ValueError, e:
	conv_setting[0] = 1
	if (is_opt("-o",opt_list)[:2] == "-o"):
	try:
	oai_identifier_from = string.atoi(is_opt("-o",opt_list)[2:])
	except ValueError, e:
	oai_identifier_from = 1
	else:
	oai_identifier_from = 1

	if (is_opt("-d",opt_list)[:2] == "-d"):
	if (os.path.isdir(source_data)):
	data_parsed = parse_input_data_d(source_data,source_tpl)
	create_record()
	tcounter = tcounter + 1
	else:
	exit_on_error("Cannot access directory: %s" % source_data)

	if (is_opt("-d",opt_list)[:2] == ""):

	done = 0
	print begin_header
	while (done == 0):

	data_parsed = parse_input_data_fx(source_tpl)

	if (data_parsed == -1):
	done = 1
	else:
	if (data_parsed[0][0]!= ''):

	create_record()
	tcounter = tcounter + 1
	print output_rec_sep

	print ending_footer

	</protect>

bibconvert.inNo OneTemporaryActions

File Metadata

bibconvert.inView Options

Event Timeline

bibconvert.in
No OneTemporary
Actions

bibconvert.in
View Options