inveniocfg_dumperloader.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, Jun 26, 02:15

inveniocfg_dumperloader.py
View Options

	# -- coding: utf-8 --
	##
	## This file is part of Invenio.
	## Copyright (C) 2010, 2011 CERN.
	##
	## Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""
	* HIGHLY EXPERIMENTAL; PLEASE DO NOT USE. *

	Invenio configuration dumper and loader CLI tool.

	Usage: python inveniocfg_dumperloader.py [options]

	General options:
	-h, --help print this help
	-V, --version print version number

	Dumper options:
	-d file dump the collections into a INI file
	-col COLLECTION1,COLLECTION2... collection/s to dump
	-all dump all the collections
	--force-ids also dump the ids of the tables to the file
	--output print the file in the screen

	Loader options:
	-l file load a file into the database
	-mode i\|c\|r select the mode to load(insert, correct, replace)
	"""

	__revision__ = "$Id$"

	import sys
	import random
	import re
	import datetime
	import StringIO
	from string import Template
	from invenio.dbquery import run_sql, wash_table_column_name

	from configobj import ConfigObj

	IDENT_TYPE = " " #Identation in the *.INI file can be a tab/spaces/etc...

	MESSAGES = [] #List of messages to display to the user at the end of the execution

	LOAD_DEFAULT_MODE = 'i'

	SEPARATOR = '.'

	#Dict of blacklisted fields and the message to display
	BLACKLIST_TABLE_COLUMNS = {
	'collection.reclist': '#INFO Please rerun webcoll.',
	'accROLE.firefole_def_ser': '#INFO Please rerun webaccessadmin -c.',
	'score':'#INFO Run whatever relevant',
	'tag.value':'#INFO Please run inveniocfg --do-something',
	'field_tag.score':'#INFO please run inveniocfg --fill-scores'
	}

	COLLECTIONS = {

	'FIELD' : {
	'tables':{
	'field':'extend(field.id=fieldname.id_field,fieldname.$ln.$type = $value)',
	'field_tag':'field_tag.id_field = field.id, field_tag.id_tag = tag.id',
	'tag':'normal'},
	'relations':'field-field_tag-tag',
	},
	'COLLECTION' : {
	'tables':{
	'collection':'normal',
	'collection_example':'collection_example.id_example = example.id, collection_example.id_collection = collection.id',
	'example':'normal'},
	'relations':'collection-collection_example-example',
	},
	'PORTALBOX' : {
	'tables':{
	'collection':'normal',
	'collection_portalbox':'collection_portalbox.id_portalbox = portalbox.id, collection_portalbox.id_collection = collection.id',
	'portalbox':'normal',
	},
	'relations':'collection-collection_portalbox-portalbox',
	},
	}


	def print_usage():
	"""Print help."""
	print __doc__

	def create_section_id(num, with_date=True):
	"""
	Generate a unique section id.
	Convert the given number in base 18 and append a 5 digit random string
	If with_date=True append the date at the beginnig so it can be ordered.
	Estructure:
	if with_date:
	date . base18(id) . 5 random chars e.g. tag.2010-07-30.ddcbz2lf
	else:
	base18(id) . 5 random chars e.g. field.ddcbz2lf
	"""
	digits = "abcdefghijklmnopqrstuvwxyz0123456789"
	str_id = ""
	tail = ''.join([random.choice(digits) for x in range(4)])
	while 1:
	rest = num % 18
	str_id = digits[rest] + str_id
	num = num / 18
	if num == 0:
	break
	if with_date == True:
	date = str(datetime.date.today())
	return date + "." + str_id + tail
	return str_id + tail

	def dict2db(table_name, dict_data, mode):
	"""
	Load the dict values into the database
	Three modes of operation:
	i - insert
	r - replace
	c - correct
	"""
	#Escape all the content in dict data to avoid " and '
	for data in dict_data:
	dict_data[data] = re.escape(dict_data[data])

	if mode == 'i': #Insert mode
	query_fields = " , " .join(dict_data.keys())
	query_values = "' , '" .join(dict_data.values())
	query = "INSERT IGNORE INTO %s(%s) VALUES ('%s')" % (wash_table_column_name(table_name),
	query_fields,
	query_values)
	elif mode == 'c': #Correct mode
	if '_' in table_name:
	query = "SELECT * FROM %s" % table_name#FIXIT Trick to execute something instead of giving error
	else:
	tbl_id = get_primary_keys(table_name)[0]
	del dict_data[tbl_id]
	query_update = " , " .join(["%s=\'%s\'" % (field, dict_data[field]) for field in dict_data])
	query = "UPDATE %s SET %s" % (wash_table_column_name(table_name),
	query_update)
	else: #Try in the default mode
	dict2db(table_name, dict_data, LOAD_DEFAULT_MODE)
	try:
	run_sql(query)
	except:
	print "VALUES: %s ALREADY EXIST IN TABLE %s. SKIPPING" % (query_values, table_name)
	pass

	def query2list(query, table_name):
	"""Given a SQL query return a list of dictionaries with the results"""
	results = run_sql(query, with_desc=True)
	lst_results = []
	dict_results = {}
	for section_id, result in enumerate(results[0]):
	dict_results = {}
	for index, field in enumerate(results[1]):
	if not is_blacklisted(table_name, field[0]):
	dict_results[field[0]] = result[index]
	lst_results.append(dict_results)
	return lst_results

	def get_primary_keys(table_name):
	"""
	Get the primary keys from the table with the DESC mysql function
	"""
	lst_keys = []
	query = "DESC %s" % wash_table_column_name(table_name)
	results = run_sql(query)
	for field in results:
	if field[3] == 'PRI':
	lst_keys.append(field[0])
	return lst_keys

	def get_unused_primary_key(table_name):
	"""
	Returns the first free id from a table
	"""
	table_id = get_primary_keys(table_name)[0]#FIXIT the table can have more than an id
	query = "SELECT %s FROM %s" % (table_id, table_name)
	results = query2list(query, table_name)
	list_used_ids = [result[table_id] for result in results]
	for unused_id in range(1, len(list_used_ids)+2):
	if not unused_id in list_used_ids:
	return str(unused_id)

	def is_blacklisted(table, field):
	"""
	Check if the current field is blacklisted, if so add the message to the messages list
	"""
	if (table+ "." + field) in BLACKLIST_TABLE_COLUMNS.keys():
	msg = BLACKLIST_TABLE_COLUMNS[(table + "." + field)]
	if not msg in MESSAGES:
	MESSAGES.append(msg)
	return True
	return False

	def get_relationship(collection, table, field_id):
	"""Return the name of the related field"""
	tbl_field = table + "." + field_id
	dict_relationship = {}
	for tbl in collection['tables'].values():
	if tbl_field in tbl:
	for foo in tbl.split(","):
	dict_value, dict_key = foo.split("=")
	dict_relationship[dict_key.strip()] = dict_value
	return dict_relationship

	def delete_keys_from_dict(dict_del, lst_keys):
	"""
	Delete the keys present in the lst_keys from the dictionary.
	Loops recursively over nested dictionaries.
	"""
	for k in lst_keys:
	try:
	del dict_del[k]
	except KeyError:
	pass
	for v in dict_del.values():
	if isinstance(v, dict):
	delete_keys_from_dict(v, lst_keys)
	return dict_del

	def extract_from_template(template, str_data):
	"""
	Extract the values from a string given the template
	If the template and the string are different, this function may fail
	Return a dictionary with the keys from the template and the values from the string
	"""
	#FIXIT this code can be more elegant
	lst_str_data = []
	dict_result = {}
	pattern = re.compile("\$\w*")
	patt_match = pattern.findall(template)
	lst_foo = str_data.split("=")
	for data in lst_foo:
	lst_str_data.extend(data.split("."))
	for index, data in enumerate(patt_match):
	data = data.replace('$','')
	dict_result[data] = lst_str_data[index+1].strip()
	return dict_result

	def delete_ids(dict_fields, lst_tables):
	"""
	Remove the ids of the tables from the dictionary
	"""
	lst_primary = []
	for tbl in lst_tables:
	lst_primary.extend(get_primary_keys(tbl))
	return delete_keys_from_dict(dict_fields, lst_primary)

	def add_special_field(collection, tbl_name , dict_data):
	"""Add the value for the translation to the dictionary"""
	str_template = collection['tables'][tbl_name].split(",")[1][:-1]#FIXIT if the final character is other?
	template_key, template_value = str_template.split("=")
	template_key = Template(template_key.strip())
	template_value = Template(template_value.strip())
	id_field = dict_data['id']
	query = "SELECT * FROM %s WHERE %s=%s" % ("fieldname", "id_field", id_field)
	result = query2list(query, "fieldname")
	if result:
	for res in result:
	dict_data[template_key.safe_substitute(res)] = template_value.safe_substitute(res)

	def dump_collection(collection, config, force_ids, print_to_screen=False):
	"""
	Dump the current collection
	Note: there are a special notation, ori(origin) - rel(relation) - fin(final)
	For example in the relation field-field_tag-tag:
	ori(origin): field table
	rel(relation): field_tag
	fin(final): tag
	"""
	tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-")
	query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori))
	lst_ori = query2list(query, tbl_ori)
	tbl_ori_id = get_primary_keys(tbl_ori)[0]
	for index_ori, result_ori in enumerate(lst_ori):
	dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id)
	query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_rel),
	dict_rels[tbl_ori+"."+tbl_ori_id],
	result_ori[tbl_ori_id])
	if collection['tables'][tbl_ori].startswith('extend'):
	add_special_field(collection, tbl_ori, result_ori)
	lst_rel = query2list(query, tbl_rel)
	for result_rel in lst_rel:
	tbl_fin_id = get_primary_keys(tbl_fin)[0]
	tbl_rel_id = dict_rels[tbl_fin+"."+tbl_fin_id].split(".")[1].strip()
	query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_fin),
	tbl_fin_id, result_rel[tbl_rel_id])
	lst_fin = query2list(query, tbl_fin)
	for index_fin, result_fin in enumerate(lst_fin):
	result_ori[tbl_fin+"."+create_section_id(index_fin, with_date=False)] = result_fin

	section_name = tbl_ori + "." + create_section_id(index_ori)
	if force_ids == False:#Remove the ids from the dict
	results = delete_ids(result_ori, collection['relations'].split("-"))
	config[section_name] = results
	else:
	config[section_name] = result_ori

	if print_to_screen == True:
	output = StringIO.StringIO()
	config.write(output)#Write to the output string instead of the file
	print output.getvalue()
	else:
	config.write()

	def get_collection(table_name):
	"""Get the collection asociated with the section"""
	for collection in COLLECTIONS.items():
	if table_name in collection[1]['relations'].split("-")[0]:
	return COLLECTIONS[collection[0]]#this is the collection to load

	def load_section(section_name, dict_data, mode):
	"""
	Load the section back into the database

	table_name is the name of the main section
	There are some special notation: ori(origin) - rel(related) - fin(final) - ext(extended)
	For example for the field-tag collection:
	ori: field
	ext: fieldname
	rel: field_tag
	fin:tag
	"""
	table_ori = section_name.split(".")[0]
	collection = get_collection(table_ori)
	ori_definition = collection['tables'][table_ori]
	if ori_definition.startswith("extend"):
	tbl_ext_name = ori_definition.split(",")[1].split(SEPARATOR)[0]

	lst_tables = collection['relations'].split("-")

	ori_id = get_primary_keys(lst_tables[0])[0]
	ori_id_value = get_unused_primary_key(lst_tables[0])
	dict_data[ori_id] = ori_id_value#Add the calculated id to the dictionary
	#I will separate the dict_data into these 3 dicts corresponding to 3 different tables
	dict_ori = {}
	dict_rel = {}
	dict_ext = {}

	for field in dict_data:
	if type(dict_data[field]) == str:#the field is a string
	if "tbl_ext_name" in locals() and field.startswith(tbl_ext_name):#is extended table
	dict2db("fieldname",
	extract_from_template("fieldname.$ln.$type = $value",
	str(field) + " = " + str(dict_data[field])),
	mode)
	else:
	dict_ori[field] = dict_data[field]
	else:#if the field is a dictionary
	fin_id = get_primary_keys(lst_tables[2])[0]
	fin_id_value = get_unused_primary_key(lst_tables[2])
	dict_data[field][fin_id] = fin_id_value
	dict2db(lst_tables[2], dict_data[field], mode)#Insert the final into the DB

	fieldtag_ids = get_primary_keys(lst_tables[1])
	dict_rel[fieldtag_ids[0]] = ori_id_value
	dict_rel[fieldtag_ids[1]] = fin_id_value
	dict2db(lst_tables[1], dict_rel, mode)#Insert the relation into the DB

	dict2db(lst_tables[0], dict_ori, mode)

	def cli_cmd_dump_config():
	"""Dump the selected collection/s"""
	config = ConfigObj(indent_type=IDENT_TYPE)
	config.initial_comment = [
	str(datetime.datetime.now()),
	"This file is automatically generated by Invenio, running:",
	" ".join(sys.argv) ,
	"" ]

	force_ids = False
	if "--force-ids" in sys.argv:
	force_ids = True
	print_to_screen = False
	if '--output' in sys.argv:
	print_to_screen = True

	try:
	config.filename = sys.argv[sys.argv.index('-d') + 1]
	except:
	print_usage()

	if '-col' in sys.argv:
	try:
	collection = COLLECTIONS[sys.argv[sys.argv.index('-col') + 1].upper()]
	dump_collection(collection, config, force_ids, print_to_screen)
	except:
	print "ERROR: you must especify the collection to dump with the -col COLLECTION_NAME option"
	elif '-all' in sys.argv:
	for collection in COLLECTIONS:
	dump_collection(COLLECTIONS[collection], config, force_ids, print_to_screen)
	else:
	print "Please specify the collection to dump"

	def cli_cmd_load_config():
	"""Load all the config sections back into the database"""
	config = ConfigObj(sys.argv[sys.argv.index('-l') + 1])
	mode = "r"
	if '-mode' in sys.argv:
	try:
	mode = sys.argv[sys.argv.index('-mode') + 1]
	if mode not in ['i', 'c', 'r']:
	print "Not valid mode please select one of the following (i)nsert, (c)orrect or (r)eplace"
	sys.exit(1)
	except IndexError:
	print "You must especify the mode with the -mode option"
	sys.exit(1)

	for section in config.sections:
	load_section(section, config[section], mode)

	def main():
	"""
	Main section, makes the calls to all the functions
	"""
	if "-d" in sys.argv:
	cli_cmd_dump_config()
	elif "-l" in sys.argv:
	cli_cmd_load_config()
	elif "-h" in sys.argv:
	print_usage()
	else:
	print_usage()

	for message in MESSAGES:
	print message

	if __name__ == '__main__':
	main()

inveniocfg_dumperloader.pyNo OneTemporaryActions

File Metadata

inveniocfg_dumperloader.pyView Options

Event Timeline

inveniocfg_dumperloader.py
No OneTemporary
Actions

inveniocfg_dumperloader.py
View Options