diff --git a/modules/bibindex/lib/bibindex_engine.py b/modules/bibindex/lib/bibindex_engine.py index 95229aacf..8e61c2bce 100644 --- a/modules/bibindex/lib/bibindex_engine.py +++ b/modules/bibindex/lib/bibindex_engine.py @@ -1,1584 +1,1585 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## BibIndxes bibliographic data, reference and fulltext indexing utility. ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibIndex indexing engine implementation. See bibindex executable for entry point. """ __revision__ = "$Id$" import os import re import sys import time import urllib2 import tempfile import traceback from invenio.config import \ CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS, \ CFG_BIBINDEX_CHARS_PUNCTUATION, \ CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY, \ CFG_BIBINDEX_MIN_WORD_LENGTH, \ CFG_BIBINDEX_REMOVE_HTML_MARKUP, \ CFG_BIBINDEX_REMOVE_LATEX_MARKUP, \ weburl, CFG_TMPDIR from invenio.bibindex_engine_config import * from invenio.bibdocfile import bibdocfile_url_to_fullpath, bibdocfile_url_p, decompose_bibdocfile_url from invenio.search_engine import perform_request_search, strip_accents, wash_index_term, get_index_stemming_language from invenio.dbquery import run_sql, DatabaseError, serialize_via_marshal, deserialize_via_marshal from invenio.bibindex_engine_stopwords import is_stopword from invenio.bibindex_engine_stemmer import stem from invenio.bibtask import task_init, write_message, get_datetime, \ task_set_option, task_get_option, task_get_task_param, task_update_status, \ task_update_progress from invenio.intbitset import intbitset from invenio.errorlib import register_exception ## import optional modules: try: import psyco psyco.bind(get_words_from_phrase) psyco.bind(WordTable.merge_with_old_recIDs) except: pass ## precompile some often-used regexp for speed reasons: re_subfields = re.compile('\$\$\w') re_html = re.compile("(?s)<[^>]*>|&#?\w+;") re_block_punctuation_begin = re.compile(r"^"+CFG_BIBINDEX_CHARS_PUNCTUATION+"+") re_block_punctuation_end = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION+"+$") re_punctuation = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION) re_separators = re.compile(CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS) re_datetime_shift = re.compile("([-\+]{0,1})([\d]+)([dhms])") nb_char_in_line = 50 # for verbose pretty printing chunksize = 1000 # default size of chunks that the records will be treated by base_process_size = 4500 # process base size _last_word_table = None ## Dictionary merging functions def intersection(dict1, dict2): "Returns intersection of the two dictionaries." int_dict = {} if len(dict1) > len(dict2): for e in dict2: if dict1.has_key(e): int_dict[e] = 1 else: for e in dict1: if dict2.has_key(e): int_dict[e] = 1 return int_dict def union(dict1, dict2): "Returns union of the two dictionaries." union_dict = {} for e in dict1.keys(): union_dict[e] = 1 for e in dict2.keys(): union_dict[e] = 1 return union_dict def diff(dict1, dict2): "Returns dict1 - dict2." diff_dict = {} for e in dict1.keys(): if not dict2.has_key(e): diff_dict[e] = 1 return diff_dict def list_union(list1, list2): "Returns union of the two lists." union_dict = {} for e in list1: union_dict[e] = 1 for e in list2: union_dict[e] = 1 return union_dict.keys() ## safety function for killing slow DB threads: def kill_sleepy_mysql_threads(max_threads=CFG_MAX_MYSQL_THREADS, thread_timeout=CFG_MYSQL_THREAD_TIMEOUT): """Check the number of DB threads and if there are more than MAX_THREADS of them, lill all threads that are in a sleeping state for more than THREAD_TIMEOUT seconds. (This is useful for working around the the max_connection problem that appears during indexation in some not-yet-understood cases.) If some threads are to be killed, write info into the log file. """ res = run_sql("SHOW FULL PROCESSLIST") if len(res) > max_threads: for row in res: r_id, dummy, dummy, dummy, r_command, r_time, dummy, dummy = row if r_command == "Sleep" and int(r_time) > thread_timeout: run_sql("KILL %s", (r_id,)) write_message("WARNING: too many DB threads, killing thread %s" % r_id, verbose=1) return ## MARC-21 tag/field access functions def get_fieldvalues(recID, tag): """Returns list of values of the MARC-21 'tag' fields for the record 'recID'.""" out = [] bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = "SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s'" \ % (bibXXx, bibrec_bibXXx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def get_associated_subfield_value(recID, tag, value, associated_subfield_code): """Return list of ASSOCIATED_SUBFIELD_CODE, if exists, for record RECID and TAG of value VALUE. Used by fulltext indexer only. Note: TAG must be 6 characters long (tag+ind1+ind2+sfcode), otherwise en empty string is returned. FIXME: what if many tag values have the same value but different associated_subfield_code? Better use bibrecord library for this. """ out = "" if len(tag) != 6: return out bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.field_number, b.tag, b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s%%'""" % \ (bibXXx, bibrec_bibXXx, recID, tag[:-1]) res = run_sql(query) field_number = -1 for row in res: if row[1] == tag and row[2] == value: field_number = row[0] if field_number > 0: for row in res: if row[0] == field_number and row[1] == tag[:-1] + associated_subfield_code: out = row[2] break return out def get_field_tags(field): """Returns a list of MARC tags for the field code 'field'. Returns empty list in case of error. Example: field='author', output=['100__%','700__%'].""" out = [] query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" % field res = run_sql(query) for row in res: out.append(row[0]) return out ## Fulltext word extraction functions def get_fulltext_urls_from_html_page(htmlpagebody): """Parses htmlpagebody data (the splash page content) looking for url_directs referring to probable fulltexts. Returns an array of (ext,url_direct) to fulltexts. Note: it looks for file format extensions as defined by global 'CONV_PROGRAMS' structure, minus the HTML ones, because we don't want to index HTML pages that the splash page might point to. """ out = [] for ext in CONV_PROGRAMS.keys(): expr = re.compile( r"\"(http://[\w]+\.+[\w]+[^\"'><]*\." + \ ext + r")\"") match = expr.search(htmlpagebody) if match and ext not in ['htm', 'html']: out.append([ext, match.group(1)]) #else: # FIXME: workaround for getfile, should use bibdoc tables #expr_getfile = re.compile(r"\"(http://.*getfile\.py\?.*format=" + ext + r"&version=.*)\"") #match = expr_getfile.search(htmlpagebody) #if match and ext not in ['htm', 'html']: #out.append([ext, match.group(1)]) return out -def get_words_from_local_fulltext(path, ext=''): - # FIXME +def get_words_from_local_fulltext(path, ext='', stemming_language=None): + # FIXME to be continued + raise NotImplemented if not ext: ext = path[len(file_strip_ext(path))+1:].lower() tmp_name = path.replace(';', '\\;') tmp_dst_name = tempfile.mkstemp('invenio.tmp.txt', dir=CFG_TMPDIR)[1] # try all available conversion programs according to their order: bingo = 0 for conv_program in CONV_PROGRAMS.get(ext, []): if os.path.exists(conv_program): # intelligence on how to run various conversion programs: cmd = "" # wil keep command to run bingo = 0 # had we success? if os.path.basename(conv_program) == "pdftotext": cmd = "%s -enc UTF-8 %s %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "pstotext": if ext == "ps.gz": # is there gzip available? if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]): cmd = "%s -cd %s | %s > %s" \ % (CONV_PROGRAMS_HELPERS["gz"], tmp_name, conv_program, tmp_dst_name) else: cmd = "%s %s > %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "ps2ascii": if ext == "ps.gz": # is there gzip available? if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]): cmd = "%s -cd %s | %s > %s"\ % (CONV_PROGRAMS_HELPERS["gz"], tmp_name, conv_program, tmp_dst_name) else: cmd = "%s %s %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "antiword": cmd = "%s %s > %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "catdoc": cmd = "%s %s > %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "wvText": cmd = "%s %s %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "ppthtml": # is there html2text available? if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): cmd = "%s %s | %s > %s"\ % (conv_program, tmp_name, CONV_PROGRAMS_HELPERS["html"], tmp_dst_name) else: cmd = "%s %s > %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "xlhtml": # is there html2text available? if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): cmd = "%s %s | %s > %s" % \ (conv_program, tmp_name, CONV_PROGRAMS_HELPERS["html"], tmp_dst_name) else: cmd = "%s %s > %s" % \ (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "html2text": cmd = "%s %s > %s" % \ (conv_program, tmp_name, tmp_dst_name) else: sys.stderr.write("Error: Do not know how to handle %s conversion program.\n" % conv_program) # try to run it: try: write_message("..... launching %s" % cmd, verbose=9) # Note we replace ; in order to make happy internal file names errcode = os.system(cmd) if errcode == 0 and os.path.exists(tmp_dst_name): bingo = 1 break # bingo! else: write_message("Error while running %s for %s.\n" % (cmd, path), sys.stderr) except: write_message("Error running %s for %s.\n" % (cmd, path), sys.stderr) # were we successful? if bingo: tmp_name_txt_file = open(tmp_dst_name) for phrase in tmp_name_txt_file.xreadlines(): for word in get_words_from_phrase(phrase, stemming_language): if not words.has_key(word): words[word] = 1 tmp_name_txt_file.close() else: write_message("No conversion success for %s.\n" % (path), sys.stderr) # delete temp files (they might not exist): try: os.unlink(tmp_dst_name) except StandardError: write_message("Error: Could not delete file. It didn't exist", sys.stderr) write_message("... reading fulltext files from %s ended" % path, verbose=2) return words.keys() def get_words_from_fulltext(url_direct_or_indirect, stemming_language=None): """Returns all the words contained in the document specified by URL_DIRECT_OR_INDIRECT with the words being split by various SRE_SEPARATORS regexp set earlier. If FORCE_FILE_EXTENSION is set (e.g. to "pdf", then treat URL_DIRECT_OR_INDIRECT as a PDF file. (This is interesting to index Indico for example.) Note also that URL_DIRECT_OR_INDIRECT may be either a direct URL to the fulltext file or an URL to a setlink-like page body that presents the links to be indexed. In the latter case the URL_DIRECT_OR_INDIRECT is parsed to extract actual direct URLs to fulltext documents, for all knows file extensions as specified by global CONV_PROGRAMS config variable. """ if CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY and \ url_direct_or_indirect.find(weburl) < 0: return [] write_message("... reading fulltext files from %s started" % url_direct_or_indirect, verbose=2) fulltext_urls = [] if bibdocfile_url_p(url_direct_or_indirect): write_message("... url %s is an internal url" % url_direct_or_indirect, verbose=9) ext = decompose_bibdocfile_url(url_direct_or_indirect)[2] if ext[0] == '.': ext = ext[1:].lower() fulltext_urls = [(ext, url_direct_or_indirect)] else: # check for direct link in url url_direct_or_indirect_ext = url_direct_or_indirect.split(".")[-1].lower() if url_direct_or_indirect_ext in CONV_PROGRAMS.keys(): fulltext_urls = [(url_direct_or_indirect_ext, url_direct_or_indirect)] # Indirect URL. Try to discover the real fulltext(s) from this splash page URL. if not fulltext_urls: # read "setlink" data try: htmlpagebody = urllib2.urlopen(url_direct_or_indirect).read() except Exception, e: register_exception() sys.stderr.write("Error: Cannot read %s: %s" % (url_direct_or_indirect, e)) return [] fulltext_urls = get_fulltext_urls_from_html_page(htmlpagebody) write_message("... fulltext_urls = %s" % fulltext_urls, verbose=9) write_message('... data to elaborate: %s' % fulltext_urls, verbose=9) words = {} # process as many urls as they were found: for (ext, url_direct) in fulltext_urls: write_message(".... processing %s from %s started" % (ext, url_direct), verbose=2) # sanity check: if not url_direct: break if bibdocfile_url_p(url_direct): # Let's manage this with BibRecDocs... # We got something like http://$(weburl)/record/xxx/yyy.ext try: tmp_name = bibdocfile_url_to_fullpath(url_direct) write_message("Found internal path %s for url %s" % (tmp_name, url_direct), verbose=2) no_src_delete = True except Exception, e: register_exception() sys.stderr.write("Error in retrieving fulltext from internal url %s: %s\n" % (url_direct, e)) break # try other fulltext files... else: # read fulltext file: try: url = urllib2.urlopen(url_direct) no_src_delete = False except Exception, e: register_exception() sys.stderr.write("Error: Cannot read %s: %s\n" % (url_direct, e)) break # try other fulltext files... tmp_fd, tmp_name = tempfile.mkstemp('invenio.tmp') data_chunk = url.read(8*1024) while data_chunk: os.write(tmp_fd, data_chunk) data_chunk = url.read(8*1024) os.close(tmp_fd) tmp_dst_name = tempfile.mkstemp('invenio.tmp.txt', dir=CFG_TMPDIR)[1] bingo = 0 # try all available conversion programs according to their order: for conv_program in CONV_PROGRAMS.get(ext, []): if os.path.exists(conv_program): # intelligence on how to run various conversion programs: cmd = "" # will keep command to run bingo = 0 # had we success? if os.path.basename(conv_program) == "pdftotext": cmd = "%s -enc UTF-8 %s %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "pstotext": if ext == "ps.gz": # is there gzip available? if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]): cmd = "%s -cd %s | %s > %s" \ % (CONV_PROGRAMS_HELPERS["gz"], tmp_name, conv_program, tmp_dst_name) else: cmd = "%s %s > %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "ps2ascii": if ext == "ps.gz": # is there gzip available? if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]): cmd = "%s -cd %s | %s > %s"\ % (CONV_PROGRAMS_HELPERS["gz"], tmp_name, conv_program, tmp_dst_name) else: cmd = "%s %s %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "antiword": cmd = "%s %s > %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "catdoc": cmd = "%s %s > %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "wvText": cmd = "%s %s %s" % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "ppthtml": # is there html2text available? if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): cmd = "%s %s | %s > %s"\ % (conv_program, tmp_name, CONV_PROGRAMS_HELPERS["html"], tmp_dst_name) else: cmd = "%s %s > %s" \ % (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "xlhtml": # is there html2text available? if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): cmd = "%s %s | %s > %s" % \ (conv_program, tmp_name, CONV_PROGRAMS_HELPERS["html"], tmp_dst_name) else: cmd = "%s %s > %s" % \ (conv_program, tmp_name, tmp_dst_name) elif os.path.basename(conv_program) == "html2text": cmd = "%s %s > %s" % \ (conv_program, tmp_name, tmp_dst_name) else: sys.stderr.write("Error: Do not know how to handle %s conversion program.\n" % conv_program) # try to run it: try: write_message("..... launching %s" % cmd, verbose=9) # Note we replace ; in order to make happy internal file names errcode = os.system(cmd.replace(';', '\\;')) if errcode == 0 and os.path.exists(tmp_dst_name): bingo = 1 break # bingo! else: write_message("Error while running %s for %s.\n" % (cmd, url_direct), sys.stderr) except: write_message("Error running %s for %s.\n" % (cmd, url_direct), sys.stderr) # were we successful? if bingo: tmp_name_txt_file = open(tmp_dst_name) for phrase in tmp_name_txt_file.xreadlines(): for word in get_words_from_phrase(phrase, stemming_language): if not words.has_key(word): words[word] = 1 tmp_name_txt_file.close() else: write_message("No conversion success for %s.\n" % (url_direct), sys.stderr) # delete temp files (they might not exist): try: if not no_src_delete: os.unlink(tmp_name) os.unlink(tmp_dst_name) except StandardError: write_message("Error: Could not delete file. It didn't exist", sys.stderr) write_message(".... processing %s from %s ended" % (ext, url_direct), verbose=2) write_message("... reading fulltext files from %s ended" % url_direct_or_indirect, verbose=2) return words.keys() latex_markup_re = re.compile(r"\\begin(\[.+?\])?\{.+?\}|\\end\{.+?}|\\\w+(\[.+?\])?\{(?P.*?)\}|\{\\\w+ (?P.*?)\}") def remove_latex_markup(phrase): ret_phrase = '' index = 0 for match in latex_markup_re.finditer(phrase): ret_phrase += phrase[index:match.start()] ret_phrase += match.group('inside1') or match.group('inside2') or '' index = match.end() ret_phrase += phrase[index:] return ret_phrase def get_nothing_from_phrase(phrase, stemming_language=None): """ A dump implementation of get_words_from_phrase to be used when when a tag should not be indexed (such as when trying to extract phrases from 8564_u).""" return [] latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]') def get_words_from_phrase(phrase, stemming_language=None): """Return list of words found in PHRASE. Note that the phrase is split into groups depending on the alphanumeric characters and punctuation characters definition present in the config file. """ words = {} formulas = [] if CFG_BIBINDEX_REMOVE_HTML_MARKUP and phrase.find(" -1: phrase = re_html.sub(' ', phrase) if CFG_BIBINDEX_REMOVE_LATEX_MARKUP: formulas = latex_formula_re.findall(phrase) phrase = remove_latex_markup(phrase) phrase = latex_formula_re.sub(' ', phrase) phrase = phrase.lower() # 1st split phrase into blocks according to whitespace for block in strip_accents(phrase).split(): # 2nd remove leading/trailing punctuation and add block: block = re_block_punctuation_begin.sub("", block) block = re_block_punctuation_end.sub("", block) if block: if stemming_language: block = apply_stemming_and_stopwords_and_length_check(block, stemming_language) if block: words[block] = 1 # 3rd break each block into subblocks according to punctuation and add subblocks: for subblock in re_punctuation.split(block): if stemming_language: subblock = apply_stemming_and_stopwords_and_length_check(subblock, stemming_language) if subblock: words[subblock] = 1 # 4th break each subblock into alphanumeric groups and add groups: for alphanumeric_group in re_separators.split(subblock): if stemming_language: alphanumeric_group = apply_stemming_and_stopwords_and_length_check(alphanumeric_group, stemming_language) if alphanumeric_group: words[alphanumeric_group] = 1 for block in formulas: words[block] = 1 return words.keys() phrase_delimiter_re = re.compile(r'[\.:;\?\!]') space_cleaner_re = re.compile(r'\s+') def get_phrases_from_phrase(phrase, stemming_language=None): """Return list of phrases found in PHRASE. Note that the phrase is split into groups depending on the alphanumeric characters and punctuation characters definition present in the config file. """ words = {} phrase = strip_accents(phrase) # 1st split phrase into blocks according to whitespace for block1 in phrase_delimiter_re.split(strip_accents(phrase)): block1 = block1.strip() if block1 and stemming_language: new_words = [] for block2 in re_punctuation.split(block1): block2 = block2.strip() if block2: for block3 in block2.split(): block3 = block3.strip() if block3: block3 = apply_stemming_and_stopwords_and_length_check(block3, stemming_language) if block3: new_words.append(block3) block1 = ' '.join(new_words) if block1: words[block1] = 1 return words.keys() def apply_stemming_and_stopwords_and_length_check(word, stemming_language): """Return WORD after applying stemming and stopword and length checks. See the config file in order to influence these. """ # now check against stopwords: if is_stopword(word): return "" # finally check the word length: if len(word) < CFG_BIBINDEX_MIN_WORD_LENGTH: return "" # stem word, when configured so: if stemming_language: word = stem(word, stemming_language) return word def remove_subfields(s): "Removes subfields from string, e.g. 'foo $$c bar' becomes 'foo bar'." return re_subfields.sub(' ', s) def get_index_id_from_index_name(index_name): """Returns the words/phrase index id for INDEXNAME. Returns empty string in case there is no words table for this index. Example: field='author', output=4.""" out = 0 query = """SELECT w.id FROM idxINDEX AS w WHERE w.name='%s' LIMIT 1""" % index_name res = run_sql(query, None, 1) if res: out = res[0][0] return out def get_index_tags(indexname): """Returns the list of tags that are indexed inside INDEXNAME. Returns empty list in case there are no tags indexed in this index. Note: uses get_field_tags() defined before. Example: field='author', output=['100__%', '700__%'].""" out = [] query = """SELECT f.code FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f WHERE w.name='%s' AND w.id=wf.id_idxINDEX AND f.id=wf.id_field""" % indexname res = run_sql(query) for row in res: out.extend(get_field_tags(row[0])) return out def get_all_indexes(): """Returns the list of the names of all defined words indexes. Returns empty list in case there are no tags indexed in this index. Example: output=['global', 'author'].""" out = [] query = """SELECT name FROM idxINDEX""" res = run_sql(query) for row in res: out.append(row[0]) return out def split_ranges(parse_string): """Parse a string a return the list or ranges.""" recIDs = [] ranges = parse_string.split(",") for arange in ranges: tmp_recIDs = arange.split("-") if len(tmp_recIDs)==1: recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])]) else: if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check tmp = tmp_recIDs[0] tmp_recIDs[0] = tmp_recIDs[1] tmp_recIDs[1] = tmp recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])]) return recIDs def get_word_tables(tables): """ Given a list of table names it return a dictionary of index_id : index_tags. if tables is empty it returns the whole dictionary.""" wordTables = {} if tables: indexes = tables.split(",") for index in indexes: index_id = get_index_id_from_index_name(index) if index_id: wordTables[index_id] = get_index_tags(index) else: write_message("Error: There is no %s words table." % index, sys.stderr) else: for index in get_all_indexes(): - index_id = get_index_id_index_name(index) + index_id = get_index_id_from_index_name(index) wordTables[index_id] = get_index_tags(index) return wordTables def get_date_range(var): "Returns the two dates contained as a low,high tuple" limits = var.split(",") if len(limits)==1: low = get_datetime(limits[0]) return low, None if len(limits)==2: low = get_datetime(limits[0]) high = get_datetime(limits[1]) return low, high return None, None def create_range_list(res): """Creates a range list from a recID select query result contained in res. The result is expected to have ascending numerical order.""" if not res: return [] row = res[0] if not row: return [] else: range_list = [[row[0], row[0]]] for row in res[1:]: row_id = row[0] if row_id == range_list[-1][1] + 1: range_list[-1][1] = row_id else: range_list.append([row_id, row_id]) return range_list def beautify_range_list(range_list): """Returns a non overlapping, maximal range list""" ret_list = [] for new in range_list: found = 0 for old in ret_list: if new[0] <= old[0] <= new[1] + 1 or new[0] - 1 <= old[1] <= new[1]: old[0] = min(old[0], new[0]) old[1] = max(old[1], new[1]) found = 1 break if not found: ret_list.append(new) return ret_list def truncate_index_table(index_name): """Properly truncate the given index.""" index_id = get_index_id_from_index_name(index_name) if index_id: write_message('Truncating %s index table in order to reindex.' % index_name, verbose=2) run_sql("UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00' WHERE id=%s", (index_id,)) run_sql("TRUNCATE idxWORD%02dF" % index_id) run_sql("TRUNCATE idxWORD%02dR" % index_id) run_sql("TRUNCATE idxPHRASE%02dF" % index_id) run_sql("TRUNCATE idxPHRASE%02dR" % index_id) class WordTable: "A class to hold the words table." def __init__(self, index_id, fields_to_index, table_name_pattern, default_get_words_fnc, tag_to_words_fnc_map, wash_index_terms=True): """Creates words table instance. @param index_id the index integer identificator @param fields_to_index a list of fields to index @param table_name_pattern i.e. idxWORD%02dF or idxPHRASE%02dF @parm default_get_words_fnc the default function called to extract words from a metadata @param tag_to_words_fnc_map a mapping to specify particular function to extract words from particular metdata (such as 8564_u) """ self.index_id = index_id self.tablename = table_name_pattern % index_id self.recIDs_in_mem = [] self.fields_to_index = fields_to_index self.value = {} self.stemming_language = get_index_stemming_language(index_id) self.wash_index_terms = wash_index_terms # tagToFunctions mapping. It offers an indirection level necessary for # indexing fulltext. The default is get_words_from_phrase self.tag_to_words_fnc_map = tag_to_words_fnc_map self.default_get_words_fnc = default_get_words_fnc if self.stemming_language: write_message('Stemming(%s) is enabled for table %s' % (self.stemming_language, self.tablename)) else: write_message('Stemming is disabled for table %s' % self.tablename) def get_field(self, recID, tag): """Returns list of values of the MARC-21 'tag' fields for the record 'recID'.""" out = [] bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID, tag); res = run_sql(query) for row in res: out.append(row[0]) return out def clean(self): "Cleans the words table." self.value = {} def put_into_db(self, mode="normal"): """Updates the current words table in the corresponding DB idxFOO table. Mode 'normal' means normal execution, mode 'emergency' means words index reverting to old state. """ write_message("%s %s wordtable flush started" % (self.tablename, mode), verbose=2) write_message('...updating %d words into %s started' % \ (len(self.value), self.tablename), verbose=2) task_update_progress("%s flushed %d/%d words" % (self.tablename, 0, len(self.value))) self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem) if mode == "normal": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='TEMPORARY' WHERE id_bibrec BETWEEN '%d' AND '%d' AND type='CURRENT'""" % \ (self.tablename[:-1], group[0], group[1]) write_message(query, verbose=9) run_sql(query) nb_words_total = len(self.value) nb_words_report = int(nb_words_total/10.0) nb_words_done = 0 for word in self.value.keys(): self.put_word_into_db(word) nb_words_done += 1 if nb_words_report != 0 and ((nb_words_done % nb_words_report) == 0): write_message('......processed %d/%d words' % (nb_words_done, nb_words_total)) task_update_progress("%s flushed %d/%d words" % (self.tablename, nb_words_done, nb_words_total)) write_message('...updating %d words into %s ended' % \ (nb_words_total, self.tablename), verbose=9) write_message('...updating reverse table %sR started' % self.tablename[:-1]) if mode == "normal": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \ (self.tablename[:-1], group[0], group[1]) write_message(query, verbose=9) run_sql(query) query = """DELETE FROM %sR WHERE id_bibrec BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \ (self.tablename[:-1], group[0], group[1]) write_message(query, verbose=9) run_sql(query) write_message('End of updating wordTable into %s' % self.tablename, verbose=9) elif mode == "emergency": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \ (self.tablename[:-1], group[0], group[1]) write_message(query, verbose=9) run_sql(query) query = """DELETE FROM %sR WHERE id_bibrec BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \ (self.tablename[:-1], group[0], group[1]) write_message(query, verbose=9) run_sql(query) write_message('End of emergency flushing wordTable into %s' % self.tablename, verbose=9) write_message('...updating reverse table %sR ended' % self.tablename[:-1]) self.clean() self.recIDs_in_mem = [] write_message("%s %s wordtable flush ended" % (self.tablename, mode)) task_update_progress("%s flush ended" % (self.tablename)) def load_old_recIDs(self, word): """Load existing hitlist for the word from the database index files.""" query = "SELECT hitlist FROM %s WHERE term=%%s" % self.tablename res = run_sql(query, (word,)) if res: return intbitset(res[0][0]) else: return None def merge_with_old_recIDs(self,word,set): """Merge the system numbers stored in memory (hash of recIDs with value +1 or -1 according to whether to add/delete them) with those stored in the database index and received in set universe of recIDs for the given word. Return 0 in case no change was done to SET, return 1 in case SET was changed. """ oldset = intbitset(set) set.update_with_signs(self.value[word]) return set != oldset def put_word_into_db(self, word): """Flush a single word to the database and delete it from memory""" set = self.load_old_recIDs(word) if set: # merge the word recIDs found in memory: if self.merge_with_old_recIDs(word,set) == 0: # nothing to update: write_message("......... unchanged hitlist for ``%s''" % word, verbose=9) pass else: # yes there were some new words: write_message("......... updating hitlist for ``%s''" % word, verbose=9) run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % self.tablename, (set.fastdump(), word)) else: # the word is new, will create new set: write_message("......... inserting hitlist for ``%s''" % word, verbose=9) set = intbitset(self.value[word].keys()) run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % self.tablename, (word, set.fastdump())) if not set: # never store empty words run_sql("DELETE from %s WHERE term=%%s" % self.tablename, (word,)) del self.value[word] def display(self): "Displays the word table." keys = self.value.keys() keys.sort() for k in keys: write_message("%s: %s" % (k, self.value[k])) def count(self): "Returns the number of words in the table." return len(self.value) def info(self): "Prints some information on the words table." write_message("The words table contains %d words." % self.count()) def lookup_words(self, word=""): "Lookup word from the words table." if not word: done = 0 while not done: try: word = raw_input("Enter word: ") done = 1 except (EOFError, KeyboardInterrupt): return if self.value.has_key(word): write_message("The word '%s' is found %d times." \ % (word, len(self.value[word]))) else: write_message("The word '%s' does not exist in the word file."\ % word) def update_last_updated(self, starting_time=None): """Update last_updated column of the index table in the database. Puts starting time there so that if the task was interrupted for record download, the records will be reindexed next time.""" if starting_time is None: return None write_message("updating last_updated to %s...", starting_time, verbose=9) return run_sql("UPDATE idxINDEX SET last_updated=%s WHERE id=%s", (starting_time, self.index_id,)) def add_recIDs(self, recIDs, opt_flush): """Fetches records which id in the recIDs range list and adds them to the wordTable. The recIDs range list is of the form: [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]]. """ global chunksize, _last_word_table flush_count = 0 records_done = 0 records_to_go = 0 for arange in recIDs: records_to_go = records_to_go + arange[1] - arange[0] + 1 time_started = time.time() # will measure profile time for arange in recIDs: i_low = arange[0] chunksize_count = 0 while i_low <= arange[1]: # calculate chunk group of recIDs and treat it: i_high = min(i_low+opt_flush-flush_count-1,arange[1]) i_high = min(i_low+chunksize-chunksize_count-1, i_high) try: self.chk_recID_range(i_low, i_high) except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) if task_get_option('verbose') >= 9: traceback.print_tb(sys.exc_info()[2]) task_update_status("ERROR") self.put_into_db() sys.exit(1) write_message("%s adding records #%d-#%d started" % \ (self.tablename, i_low, i_high)) if CFG_CHECK_MYSQL_THREADS: kill_sleepy_mysql_threads() task_update_progress("%s adding recs %d-%d" % (self.tablename, i_low, i_high)) self.del_recID_range(i_low, i_high) just_processed = self.add_recID_range(i_low, i_high) flush_count = flush_count + i_high - i_low + 1 chunksize_count = chunksize_count + i_high - i_low + 1 records_done = records_done + just_processed write_message("%s adding records #%d-#%d ended " % \ (self.tablename, i_low, i_high)) if chunksize_count >= chunksize: chunksize_count = 0 # flush if necessary: if flush_count >= opt_flush: self.put_into_db() self.clean() write_message("%s backing up" % (self.tablename)) flush_count = 0 self.log_progress(time_started,records_done,records_to_go) # iterate: i_low = i_high + 1 if flush_count > 0: self.put_into_db() self.log_progress(time_started,records_done,records_to_go) def add_recIDs_by_date(self, dates, opt_flush): """Add records that were modified between DATES[0] and DATES[1]. If DATES is not set, then add records that were modified since the last update of the index. """ if not dates: table_id = self.tablename[-3:-1] query = """SELECT last_updated FROM idxINDEX WHERE id='%s' """ % table_id res = run_sql(query) if not res: return if not res[0][0]: dates = ("0000-00-00", None) else: dates = (res[0][0], None) if dates[1] is None: res = run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s ORDER BY b.id ASC""", (dates[0],)) elif dates[0] is None: res = run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date <= %s ORDER BY b.id ASC""", (dates[1],)) else: res = run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s AND b.modification_date <= %s ORDER BY b.id ASC""", (dates[0], dates[1])) alist = create_range_list(res) if not alist: write_message( "No new records added. %s is up to date" % self.tablename) else: self.add_recIDs(alist, opt_flush) def add_recID_range(self, recID1, recID2): """Add records from RECID1 to RECID2.""" wlist = {} self.recIDs_in_mem.append([recID1,recID2]) # secondly fetch all needed tags: for tag in self.fields_to_index: get_words_function = self.tag_to_words_fnc_map.get(tag, self.default_get_words_fnc) bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec BETWEEN %d AND %d AND bb.id_bibxxx=b.id AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID1, recID2, tag) res = run_sql(query) for row in res: recID,phrase = row if not wlist.has_key(recID): wlist[recID] = [] new_words = get_words_function(phrase, stemming_language=self.stemming_language) # ,self.separators wlist[recID] = list_union(new_words, wlist[recID]) # were there some words for these recIDs found? if len(wlist) == 0: return 0 recIDs = wlist.keys() for recID in recIDs: # was this record marked as deleted? if "DELETED" in self.get_field(recID, "980__c"): wlist[recID] = [] write_message("... record %d was declared deleted, removing its word list" % recID, verbose=9) write_message("... record %d, termlist: %s" % (recID, wlist[recID]), verbose=9) # put words into reverse index table with FUTURE status: for recID in recIDs: run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % self.tablename[:-1], (recID, serialize_via_marshal(wlist[recID]))) # ... and, for new records, enter the CURRENT status as empty: try: run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % self.tablename[:-1], (recID, serialize_via_marshal([]))) except DatabaseError: # okay, it's an already existing record, no problem pass # put words into memory word list: put = self.put for recID in recIDs: for w in wlist[recID]: put(recID, w, 1) return len(recIDs) def log_progress(self, start, done, todo): """Calculate progress and store it. start: start time, done: records processed, todo: total number of records""" time_elapsed = time.time() - start # consistency check if time_elapsed == 0 or done > todo: return time_recs_per_min = done/(time_elapsed/60.0) write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\ % (done, time_elapsed, time_recs_per_min)) if time_recs_per_min: write_message("Estimated runtime: %.1f minutes" % \ ((todo-done)/time_recs_per_min)) def put(self, recID, word, sign): "Adds/deletes a word to the word list." try: if self.wash_index_terms: word = wash_index_term(word) if self.value.has_key(word): # the word 'word' exist already: update sign self.value[word][recID] = sign else: self.value[word] = {recID: sign} except: write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID)) def del_recIDs(self, recIDs): """Fetches records which id in the recIDs range list and adds them to the wordTable. The recIDs range list is of the form: [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]]. """ count = 0 for arange in recIDs: self.del_recID_range(arange[0],arange[1]) count = count + arange[1] - arange[0] self.put_into_db() def del_recID_range(self, low, high): """Deletes records with 'recID' system number between low and high from memory words index table.""" write_message("%s fetching existing words for records #%d-#%d started" % \ (self.tablename, low, high), verbose=3) self.recIDs_in_mem.append([low,high]) query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high) recID_rows = run_sql(query) for recID_row in recID_rows: recID = recID_row[0] wlist = deserialize_via_marshal(recID_row[1]) for word in wlist: self.put(recID, word, -1) write_message("%s fetching existing words for records #%d-#%d ended" % \ (self.tablename, low, high), verbose=3) def report_on_table_consistency(self): """Check reverse words index tables (e.g. idxWORD01R) for interesting states such as 'TEMPORARY' state. Prints small report (no of words, no of bad words). """ # find number of words: query = """SELECT COUNT(*) FROM %s""" % (self.tablename) res = run_sql(query, None, 1) if res: nb_words = res[0][0] else: nb_words = 0 # find number of records: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1]) res = run_sql(query, None, 1) if res: nb_records = res[0][0] else: nb_records = 0 # report stats: write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records)) # find possible bad states in reverse tables: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1]) res = run_sql(query) if res: nb_bad_records = res[0][0] else: nb_bad_records = 999999999 if nb_bad_records: write_message("EMERGENCY: %s needs to repair %d of %d records" % \ (self.tablename, nb_bad_records, nb_records)) else: write_message("%s is in consistent state" % (self.tablename)) return nb_bad_records def repair(self, opt_flush): """Repair the whole table""" # find possible bad states in reverse tables: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1]) res = run_sql(query, None, 1) if res: nb_bad_records = res[0][0] else: nb_bad_records = 0 if nb_bad_records == 0: return query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT' ORDER BY id_bibrec""" \ % (self.tablename[:-1]) res = run_sql(query) recIDs = create_range_list(res) flush_count = 0 records_done = 0 records_to_go = 0 for arange in recIDs: records_to_go = records_to_go + arange[1] - arange[0] + 1 time_started = time.time() # will measure profile time for arange in recIDs: i_low = arange[0] chunksize_count = 0 while i_low <= arange[1]: # calculate chunk group of recIDs and treat it: i_high = min(i_low+opt_flush-flush_count-1,arange[1]) i_high = min(i_low+chunksize-chunksize_count-1, i_high) try: self.fix_recID_range(i_low, i_high) except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) if task_get_option['verbose'] >= 9: traceback.print_tb(sys.exc_info()[2]) task_update_status("ERROR") self.put_into_db() sys.exit(1) flush_count = flush_count + i_high - i_low + 1 chunksize_count = chunksize_count + i_high - i_low + 1 records_done = records_done + i_high - i_low + 1 if chunksize_count >= chunksize: chunksize_count = 0 # flush if necessary: if flush_count >= opt_flush: self.put_into_db("emergency") self.clean() flush_count = 0 self.log_progress(time_started,records_done,records_to_go) # iterate: i_low = i_high + 1 if flush_count > 0: self.put_into_db("emergency") self.log_progress(time_started,records_done,records_to_go) write_message("%s inconsistencies repaired." % self.tablename) def chk_recID_range(self, low, high): """Check if the reverse index table is in proper state""" ## check db query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT' AND id_bibrec BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high) res = run_sql(query, None, 1) if res[0][0]==0: write_message("%s for %d-%d is in consistent state"%(self.tablename,low,high)) return # okay, words table is consistent ## inconsistency detected! write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename) write_message("""EMERGENCY: Errors found. You should check consistency of the %s - %sR tables.\nRunning 'bibindex --repair' is recommended.""" \ % (self.tablename, self.tablename[:-1])) raise StandardError def fix_recID_range(self, low, high): """Try to fix reverse index database consistency (e.g. table idxWORD01R) in the low,high doc-id range. Possible states for a recID follow: CUR TMP FUT: very bad things have happened: warn! CUR TMP : very bad things have happened: warn! CUR FUT: delete FUT (crash before flushing) CUR : database is ok TMP FUT: add TMP to memory and del FUT from memory flush (revert to old state) TMP : very bad things have happened: warn! FUT: very bad things have happended: warn! """ state = {} query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN '%d' AND '%d'"\ % (self.tablename[:-1], low, high) res = run_sql(query) for row in res: if not state.has_key(row[0]): state[row[0]]=[] state[row[0]].append(row[1]) ok = 1 # will hold info on whether we will be able to repair for recID in state.keys(): if not 'TEMPORARY' in state[recID]: if 'FUTURE' in state[recID]: if 'CURRENT' not in state[recID]: write_message("EMERGENCY: Record %d is in inconsistent state. Can't repair it." % recID) ok = 0 else: write_message("EMERGENCY: Inconsistency in record %d detected" % recID) query = """DELETE FROM %sR WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID) run_sql(query) write_message("EMERGENCY: Inconsistency in record %d repaired." % recID) else: if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]: self.recIDs_in_mem.append([recID,recID]) # Get the words file query = """SELECT type,termlist FROM %sR WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID) write_message(query, verbose=9) res = run_sql(query) for row in res: wlist = deserialize_via_marshal(row[1]) write_message("Words are %s " % wlist, verbose=9) if row[0] == 'TEMPORARY': sign = 1 else: sign = -1 for word in wlist: self.put(recID, word, sign) else: write_message("EMERGENCY: %s for %d is in inconsistent state. Couldn't repair it." % (self.tablename, recID)) ok = 0 if not ok: write_message("""EMERGENCY: Unrepairable errors found. You should check consistency of the %s - %sR tables. Deleting affected entries from these tables is recommended.""" % (self.tablename, self.tablename[:-1])) raise StandardError def test_fulltext_indexing(): """Tests fulltext indexing programs on PDF, PS, DOC, PPT, XLS. Prints list of words and word table on the screen. Does not integrate anything into the database. Useful when debugging problems with fulltext indexing: call this function instead of main(). """ print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=atlnot&categ=Communication&id=com-indet-2002-012") # protected URL print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a00388&id=a00388s2t7") # XLS print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a02883&id=a02883s1t6/transparencies") # PPT print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a99149&id=a99149s1t10/transparencies") # DOC print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=preprint&categ=cern&id=lhc-project-report-601") # PDF sys.exit(0) def main(): """Main that construct all the bibtask.""" task_set_option('cmd', 'add') task_set_option('id', []) task_set_option("modified", []) task_set_option("collection", []) task_set_option("maxmem", 0) task_set_option("flush", 10000) task_set_option("windex", ','.join(get_all_indexes())) task_set_option("reindex", False) task_init(authorization_action='runbibindex', authorization_msg="BibIndex Task Submission", description="""Examples: \t%s -a -i 234-250,293,300-500 -u admin@localhost \t%s -a -w author,fulltext -M 8192 -v3 \t%s -d -m +4d -A on --flush=10000\n""" % ((sys.argv[0],) * 3), help_specific_usage=""" Indexing options: -a, --add\t\tadd or update words for selected records -d, --del\t\tdelete words for selected records -i, --id=low[-high]\t\tselect according to doc recID -m, --modified=from[,to]\tselect according to modification date -c, --collection=c1[,c2]\tselect according to collection -R, --reindex\treindex the selected indexes from scratch Repairing options: -k, --check\t\tcheck consistency for all records in the table(s) -r, --repair\t\ttry to repair all records in the table(s) Specific options: -w, --windex=w1[,w2]\tword/phrase indexes to consider (all) -M, --maxmem=XXX\tmaximum memory usage in kB (no limit) -f, --flush=NNN\t\tfull consistent table flush after NNN records (10000) """, version=__revision__, specific_params=("adi:m:c:w:krRM:f:", [ "add", "del", "id=", "modified=", "collection=", "windex=", "check", "repair", "reindex", "maxmem=", "flush=", ]), task_stop_helper_fnc=task_stop_table_close_fnc, task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core) def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: self.options['number'] = value return True return False """ if key in ("-a", "--add"): task_set_option("cmd", "add") if ("-x","") in opts or ("--del","") in opts: raise StandardError, "Can not have --add and --del at the same time!" elif key in ("-k", "--check"): task_set_option("cmd", "check") elif key in ("-r", "--repair"): task_set_option("cmd", "repair") elif key in ("-d", "--del"): task_set_option("cmd", "del") elif key in ("-i", "--id"): task_set_option('id', task_get_option('id') + split_ranges(value)) elif key in ("-m", "--modified"): task_set_option("modified", get_date_range(value)) elif key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-R", "--reindex"): task_set_option("reindex", True) elif key in ("-w", "--windex"): task_set_option("windex", value) elif key in ("-M", "--maxmem"): task_set_option("maxmem", int(value)) if task_get_option("maxmem") < base_process_size + 1000: raise StandardError, "Memory usage should be higher than %d kB" % \ (base_process_size + 1000) elif key in ("-f", "--flush"): task_set_option("flush", int(value)) else: return False return True def task_stop_table_close_fnc(): """ Close tables to STOP. """ global _last_word_table if _last_word_table: _last_word_table.put_into_db() def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task prints Fibonacci numbers for up to NUM on the stdout, and some messages on stderr. Return 1 in case of success and 0 in case of failure.""" global _last_word_table if task_get_option("cmd") == "check": wordTables = get_word_tables(task_get_option("windex")) for index_id, index_tags in wordTables.iteritems(): wordTable = WordTable(index_id, index_tags, 'idxWORD%02dF', get_words_from_phrase, {'8564_u': get_words_from_fulltext}) _last_word_table = wordTable wordTable.report_on_table_consistency() _last_word_table = None return True if False: # FIXME: remove when idxPHRASE will be plugged to search_engine if task_get_option("cmd") == "check": wordTables = get_word_tables(task_get_option("windex")) for index_id, index_tags in wordTables.iteritems(): wordTable = WordTable(index_id, index_tags, 'idxPHRASE%02dF', get_phrases_from_phrase, {'8564_u': get_nothing_from_phrase}, False) _last_word_table = wordTable wordTable.report_on_table_consistency() _last_word_table = None return True if task_get_option("reindex"): for index_name in task_get_option("windex").split(','): truncate_index_table(index_name) # Let's work on single words! wordTables = get_word_tables(task_get_option("windex")) for index_id, index_tags in wordTables.iteritems(): wordTable = WordTable(index_id, index_tags, 'idxWORD%02dF', get_words_from_phrase, {'8564_u': get_words_from_fulltext}) _last_word_table = wordTable wordTable.report_on_table_consistency() try: if task_get_option("cmd") == "del": if task_get_option("id"): wordTable.del_recIDs(task_get_option("id")) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.del_recIDs(recIDs_range) else: write_message("Missing IDs of records to delete from index %s." % wordTable.tablename, sys.stderr) raise StandardError elif task_get_option("cmd") == "add": if task_get_option("id"): wordTable.add_recIDs(task_get_option("id"), task_get_option("flush")) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.add_recIDs(recIDs_range, task_get_option("flush")) else: wordTable.add_recIDs_by_date(task_get_option("modified"), task_get_option("flush")) # only update last_updated if run via automatic mode: wordTable.update_last_updated(task_get_task_param('task_starting_time')) elif task_get_option("cmd") == "repair": wordTable.repair(task_get_option("flush")) else: write_message("Invalid command found processing %s" % \ wordTable.tablename, sys.stderr) raise StandardError except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) if task_get_option("verbose") >= 8: traceback.print_tb(sys.exc_info()[2]) task_update_status("ERROR") if _last_word_table: _last_word_table.put_into_db() sys.exit(1) wordTable.report_on_table_consistency() if False: # FIXME: remove when idxPHRASE will be plugged to search_engine # Let's work on phrases now wordTables = get_word_tables(task_get_option("windex")) for index_id, index_tags in wordTables.iteritems(): wordTable = WordTable(index_id, index_tags, 'idxPHRASE%02dF', get_phrases_from_phrase, {'8564_u': get_nothing_from_phrase}, False) _last_word_table = wordTable wordTable.report_on_table_consistency() try: if task_get_option("cmd") == "del": if task_get_option("id"): wordTable.del_recIDs(task_get_option("id")) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.del_recIDs(recIDs_range) else: write_message("Missing IDs of records to delete from index %s." % wordTable.tablename, sys.stderr) raise StandardError elif task_get_option("cmd") == "add": if task_get_option("id"): wordTable.add_recIDs(task_get_option("id"), task_get_option("flush")) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.add_recIDs(recIDs_range, task_get_option("flush")) else: wordTable.add_recIDs_by_date(task_get_option("modified"), task_get_option("flush")) # only update last_updated if run via automatic mode: wordTable.update_last_updated(task_get_task_param('task_starting_time')) elif task_get_option("cmd") == "repair": wordTable.repair(task_get_option("flush")) else: write_message("Invalid command found processing %s" % \ wordTable.tablename, sys.stderr) raise StandardError except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) if task_get_option("verbose") >= 9: traceback.print_tb(sys.exc_info()[2]) task_update_status("ERROR") if _last_word_table: _last_word_table.put_into_db() sys.exit(1) wordTable.report_on_table_consistency() _last_word_table = None return True ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/websubmit/web/admin/referees.py b/modules/websubmit/web/admin/referees.py index d08d81077..6882e031a 100644 --- a/modules/websubmit/web/admin/referees.py +++ b/modules/websubmit/web/admin/referees.py @@ -1,244 +1,244 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit interface for the management of referees.""" __revision__ = "$Id$" ## import interesting modules: import types import re from invenio.config import \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ images, \ weburl from invenio.dbquery import run_sql, Error from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import \ acc_delete_user_role, \ acc_get_role_id, \ acc_add_role, \ acc_add_action,\ acc_add_role_action_arguments, \ acc_add_argument, \ acc_get_user_roles, \ acc_add_user_role, \ acc_get_action_id, \ acc_get_all_roles, \ acc_get_role_users from invenio.webpage import page, create_error_box from invenio.webuser import getUid, get_email, list_registered_users, page_not_authorized from invenio.messages import wash_language def index(req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG, todo="", id="", doctype="", categ="", addusers="", warningText="", role=""): """Main entry point for the management of referees.""" ln = wash_language(ln) # get user ID: try: uid = getUid(req) except Error, e: - return errorMsg(e.value, req) + return errorMsg(str(e), req, ln=ln) (auth_code, auth_message) = acc_authorize_action(req, "cfgwebsubmit", verbose=0) if auth_code != 0: ## user is not authorised to use WebSubmit Admin: return page_not_authorized(req=req, text=auth_message) # request for deleting a user if todo == "deleteuser": acc_delete_user_role(id, name_role=role) # request for adding user(s) if todo == "adduser": role = "referee_%s_%s" % (doctype, categ[1]) roleId = acc_get_role_id(role) # if the role does not exists, we create it if roleId == 0: if acc_add_role(role, "referees for document type %s category %s" % (doctype, categ[1])) == 0: return errorMsg("Cannot create referee role", req) else: roleId = acc_get_role_id(role) # if the action does not exist, we create it actionId = acc_get_action_id("referee") if actionId == 0: if acc_add_action("referee", "", "no", ("doctype","categ")) == 0: return errorMsg("Cannot create action 'referee'", req) else: actionId = acc_get_action_id("referee") #create arguments arg1Id = acc_add_argument("doctype", doctype) arg2Id = acc_add_argument("categ", categ[1]) # then link the role with the action if acc_add_role_action_arguments(roleId, actionId, -1, 0, 0, [arg1Id, arg2Id]) == 0: return errorMsg("Cannot link role with action", req) roleId = acc_get_role_id(role) # For each id in the array if isinstance(addusers, types.ListType): for adduser in addusers: # First check whether this id is not already associated with this rule myRoles = acc_get_user_roles(adduser) if not roleId in myRoles: # Actually add the role to the user acc_add_user_role(adduser, roleId) else: warningText = 'Sorry... This user is already a referee for this category.' else: # First check whether this id is not already associated with this rule myRoles = acc_get_user_roles(addusers) if not roleId in myRoles: # Actually add the role to the user acc_add_user_role(addusers, roleId) else: warningText = 'Sorry... This user is already a referee for this category.' return page(title="websubmit admin - referee selection", body=displayRefereesPage(doctype, warningText), description="", keywords="", uid=uid, language=ln, req=req) def displayRefereesPage(doctype, warningText): """Output the list of refeeres as well as the controls to add/remove them""" t = "" if doctype in ['', '*']: doctype = '*' docname = "all catalogues" else: res = run_sql("SELECT * FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,)) docname = res[0][0] t += warningText t += """
""" % doctype # call the function to display the table containing the list of associated emails t += displayUserTable(doctype) t += """ """ # call the function to display the form allowing the manager to add new users t += displayAddUser(doctype) end_url = "%s/admin/websubmit/websubmitadmin.py/doctypeconfigure?doctype=%s" % (weburl, doctype) if doctype in ['', '*']: end_url = "%s/admin/websubmit/websubmitadmin.py/" % weburl t += """
Finished
""" % end_url return t def displayUserTable(doctype): """Display the list of referees for the given doctype, as well as the control to remove them""" t = "" # start displaying the table which will contain the list of email addresses. t += """ """ roles = acc_get_all_roles() referees = {} for role in roles: role_name = role[1] role_id = role[0] if re.match("^referee_%s_" % doctype, role_name): # Try to retrieve the referee's email from the referee's database if acc_get_role_users(role_id) is not None: referees[role_name] = acc_get_role_users(role_id) if len(referees) == 0: t += '' % images i = 0 for role in referees.keys(): categ = re.match("referee_%s_(.*)" % doctype, role).group(1) res = run_sql("SELECT lname FROM sbmCATEGORIES WHERE sname=%s and doctype=%s", (categ, doctype,)) if len(res) > 0: categname = "Referee(s) for category: %s" % res[0][0] else: categname = "General Referee(s)" t += '' % categname for referee in referees[role]: if int(i/2) == i/2: bgcolor = "#eeeeee" else: bgcolor = "#dddddd" t += '' % bgcolor t += '' t += '''' t += '' i += 1 # close table t += "" return t def displayAddUser(doctype): """Display controls for adding users""" t = "" # start displaying the table which will contain the add form t += """ ' return t def errorMsg(title, req, uid, c=CFG_SITE_NAME, ln=CFG_SITE_LANG): """Prints the error page.""" return page(title="error", body = create_error_box(req, title=title,verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, language=ln, uid=uid, req=req) diff --git a/modules/websubmit/web/publiline.py b/modules/websubmit/web/publiline.py index 8520a85b5..766297969 100644 --- a/modules/websubmit/web/publiline.py +++ b/modules/websubmit/web/publiline.py @@ -1,1592 +1,1592 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ publiline_complex.py -- implementes ... actors in this process are: 1. author -- subilmts ... 2. edi 3; ref Il ne faut pas oublier de definir les roles... """ __revision__ = "$Id$" ## import interesting modules: import string import os import sys import time import types import re import shutil from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ accessurl, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ images, \ CFG_PYLIBDIR, \ CFG_WEBSUBMIT_STORAGEDIR, \ supportemail, \ sweburl, \ urlpath, \ CFG_VERSION from invenio.dbquery import run_sql, Error from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import * from invenio.webpage import page, create_error_box from invenio.webuser import getUid, get_email, list_registered_users, page_not_authorized from invenio.messages import gettext_set_language, wash_language from invenio.websubmit_config import * from invenio.search_engine import search_pattern, get_fieldvalues from invenio.websubmit_functions.Retrieve_Data import Get_Field from invenio.mailutils import send_email from invenio.urlutils import wash_url_argument from invenio.webgroup_dblayer import get_group_infos, insert_new_group, insert_new_member, delete_member from invenio.webaccessadmin_lib import cleanstring_email from invenio.access_control_config import MAXSELECTUSERS from invenio.access_control_admin import acc_get_user_email from invenio.webmessage import perform_request_send, perform_request_write_with_search import invenio.webbasket_dblayer as basketdb from invenio.webbasket_config import CFG_WEBBASKET_SHARE_LEVELS, CFG_WEBBASKET_CATEGORIES, CFG_WEBBASKET_SHARE_LEVELS_ORDERED from invenio.webbasket import perform_request_display_item, perform_request_save_comment from invenio.websubmit_functions.Retrieve_Data import Get_Field from invenio.errorlib import register_exception from invenio.bibrecord import create_records, record_get_field_value, record_get_field_values execfile("%s/invenio/websubmit_functions/Retrieve_Data.py" % CFG_PYLIBDIR) import invenio.template websubmit_templates = invenio.template.load('websubmit') CFG_WEBSUBMIT_PENDING_DIR = "%s/pending" % CFG_WEBSUBMIT_STORAGEDIR CFG_WEBSUBMIT_DUMMY_MARC_XML_REC = "dummy_marcxml_rec" CFG_WEBSUBMIT_MARC_XML_REC = "recmysql" def index(req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG,doctype="",categ="",RN="",send="",flow="",apptype="", action="", email_user_pattern="", id_user="", id_user_remove="", validate="", id_user_val="", msg_subject="", msg_body=""): global uid ln = wash_language(ln) categ = wash_url_argument(categ, 'str') RN = wash_url_argument(RN, 'str') send = wash_url_argument(send, 'str') flow = wash_url_argument(flow, 'str') apptype = wash_url_argument(apptype, 'str') action = wash_url_argument(action, 'str') email_user_pattern = wash_url_argument(email_user_pattern, 'str') id_user = wash_url_argument(id_user, 'int') id_user_remove = wash_url_argument(id_user_remove, 'int') validate = wash_url_argument(validate, 'str') id_user_val = wash_url_argument(id_user_val, 'int') msg_subject = wash_url_argument(msg_subject, 'str') msg_body = wash_url_argument(msg_body, 'str') # load the right message language _ = gettext_set_language(ln) t="" # get user ID: try: uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../publiline.py/index", navmenuid='yourapprovals') uid_email = get_email(uid) except Error, e: - return errorMsg(e.value,req, ln = ln) + return errorMsg(str(e),req, ln = ln) if flow == "cplx": if doctype == "": t = selectCplxDoctype(ln) elif (categ == "") or (apptype == ""): t = selectCplxCateg(doctype, ln) elif RN == "": t = selectCplxDocument(doctype, categ, apptype, ln) elif action == "": t = displayCplxDocument(req, doctype, categ, RN, apptype, ln) else: t = doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, ln) return page(title="specific publication line", navtrail= """%(account)s""" % { 'sweburl' : sweburl, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yourapprovals') else: if doctype == "": t = selectDoctype(ln) elif categ == "": t = selectCateg(doctype, ln) elif RN == "": t = selectDocument(doctype, categ, ln) else: t = displayDocument(req, doctype, categ, RN, send, ln) return page(title="publication line", navtrail= """%(account)s""" % { 'sweburl' : sweburl, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yourapprovals') def selectDoctype(ln = CFG_SITE_LANG): res = run_sql("select DISTINCT doctype from sbmAPPROVAL") docs = [] for row in res: res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],)) docs.append({ 'doctype' : row[0], 'docname' : res2[0][0], }) t = websubmit_templates.tmpl_publiline_selectdoctype( ln = ln, docs = docs, ) return t def selectCplxDoctype(ln = CFG_SITE_LANG): res = run_sql("select DISTINCT doctype from sbmCPLXAPPROVAL") docs = [] for row in res: res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],)) docs.append({ 'doctype' : row[0], 'docname' : res2[0][0], }) t = websubmit_templates.tmpl_publiline_selectcplxdoctype( ln = ln, docs = docs, ) return t def selectCateg(doctype, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s",(doctype,)) title = res[0][0] sth = run_sql("select * from sbmCATEGORIES where doctype=%s order by lname",(doctype,)) if len(sth) == 0: categ = "unknown" return selectDocument(doctype,categ, ln = ln) categories = [] for arr in sth: waiting = 0 rejected = 0 approved = 0 sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='waiting'", (doctype,arr[1],)) waiting = sth2[0][0] sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='approved'",(doctype,arr[1],)) approved = sth2[0][0] sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='rejected'",(doctype,arr[1],)) rejected = sth2[0][0] categories.append({ 'waiting' : waiting, 'approved' : approved, 'rejected' : rejected, 'id' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectcateg( ln = ln, categories = categories, doctype = doctype, title = title, images = images, ) return t def selectCplxCateg(doctype, ln = CFG_SITE_LANG): t="" res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",(doctype,)) title = res[0][0] sth = run_sql("SELECT * FROM sbmCATEGORIES WHERE doctype=%s ORDER BY lname",(doctype,)) if len(sth) == 0: categ = "unknown" return selectCplxDocument(doctype,categ, "", ln = ln) types = {} for apptype in ('RRP', 'RPB', 'RDA'): for arr in sth: info = {'id' : arr[1], 'desc' : arr[2],} for status in ('waiting', 'rejected', 'approved', 'cancelled'): info[status] = __db_count_doc (doctype, arr[1], status, apptype) types.setdefault (apptype, []).append(info) t = websubmit_templates.tmpl_publiline_selectcplxcateg( ln = ln, types = types, doctype = doctype, title = title, images = images, ) return t def selectDocument(doctype,categ, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) title = res[0][0] if categ == "": categ == "unknown" docs = [] sth = run_sql("select rn,status from sbmAPPROVAL where doctype=%s and categ=%s order by status DESC,rn DESC",(doctype,categ)) for arr in sth: docs.append({ 'RN' : arr[0], 'status' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectdocument( ln = ln, doctype = doctype, title = title, categ = categ, images = images, docs = docs, ) return t def selectCplxDocument(doctype,categ,apptype, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) title = res[0][0] sth = run_sql("select lname from sbmCATEGORIES where doctype=%s and sname=%s order by lname",(doctype,categ,)) if len(sth) != 0: categname = sth[0][0] else: categname = "Unknown" docs = [] sth = run_sql("select rn,status from sbmCPLXAPPROVAL where doctype=%s and categ=%s and type=%s order by status DESC,rn DESC",(doctype,categ,apptype)) for arr in sth: docs.append({ 'RN' : arr[0], 'status' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectcplxdocument( ln = ln, doctype = doctype, title = title, categ = categ, categname = categname, images = images, docs = docs, apptype = apptype, ) return t def displayDocument(req, doctype,categ,RN,send, ln = CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) docname = res[0][0] if categ == "": categ = "unknown" sth = run_sql("select rn,status,dFirstReq,dLastReq,dAction,access from sbmAPPROVAL where rn=%s",(RN,)) if len(sth) > 0: arr = sth[0] rn = arr[0] status = arr[1] dFirstReq = arr[2] dLastReq = arr[3] dAction = arr[4] access = arr[5] else: return _("Approval has never been requested for this document.") + "
 " ## Get the details of the pending item: item_details = get_pending_item_details(doctype, RN) ## get_pending_item_details has returned either None or a dictionary ## with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: ## FIXME! ## For backward compatibility reasons, it we failed to find the item's ## details, we will try the old way, which includes searching for files ## like TI, TIF in the submission's working directory. ## This is not nice and should be removed. try: (authors,title,sysno,newrn) = getInfo(doctype,categ,RN) except TypeError: return _("Unable to display document.") confirm_send = 0 if send == _("Send Again"): if authors == "unknown" or title == "unknown": SendWarning(doctype,categ,RN,title,authors,access, ln = ln) else: # @todo - send in different languages SendEnglish(doctype,categ,RN,title,authors,access,sysno) run_sql("update sbmAPPROVAL set dLastReq=NOW() where rn=%s",(RN,)) confirm_send = 1 if status == "waiting": (auth_code, auth_message) = acc_authorize_action(req, "referee",verbose=0,doctype=doctype, categ=categ) else: (auth_code, auth_message) = (None, None) t = websubmit_templates.tmpl_publiline_displaydoc( ln = ln, docname = docname, doctype = doctype, categ = categ, rn = rn, status = status, dFirstReq = dFirstReq, dLastReq = dLastReq, dAction = dAction, access = access, images = images, accessurl = accessurl, confirm_send = confirm_send, auth_code = auth_code, auth_message = auth_message, authors = authors, title = title, sysno = sysno, newrn = newrn, ) return t def displayCplxDocument(req, doctype,categ,RN,apptype, ln = CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) t="" uid = getUid(req) res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) docname = res[0][0] if categ == "": categ = "unknown" key = (RN, apptype) infos = __db_get_infos (key) if len(infos) > 0: (status, id_group, id_bskBASKET, id_EdBoardGroup, dFirstReq,dLastReq,dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0] dates = {'dFirstReq' : dFirstReq, 'dLastReq' : dLastReq, 'dEdBoardSel' : dEdBoardSel, 'dRefereeSel' : dRefereeSel, 'dRefereeRecom' : dRefereeRecom, 'dEdBoardRecom' : dEdBoardRecom, 'dPubComRecom' : dPubComRecom, 'dProjectLeaderAction' : dProjectLeaderAction, } else: return _("Approval has never been requested for this document.") + "
 " ## Removing call to deprecated "getInAlice" function and replacing it with ## a call to the newer "get_brief_doc_details_from_repository" function: ## try: ## (authors,title,sysno,newrn) = getInAlice(doctype,categ,RN) ## except TypeError: ## return _("Unable to display document.") item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: ## Details of the item were found in the CDS Invenio repository authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: ## Can't find any document details. return _("Unable to display document.") if status == "waiting": isPubCom = __is_PubCom (req, doctype) isEdBoard = __is_EdBoard (uid, id_EdBoardGroup) isReferee = __is_Referee (uid, id_bskBASKET) isProjectLeader = __is_ProjectLeader (req, doctype, categ) isAuthor = __is_Author (uid, sysno) else: isPubCom = None isEdBoard = None isReferee = None isProjectLeader = None isAuthor = None t += websubmit_templates.tmpl_publiline_displaycplxdoc( ln = ln, docname = docname, doctype = doctype, categ = categ, rn = RN, apptype = apptype, status = status, dates = dates, images = images, accessurl = accessurl, isPubCom = isPubCom, isEdBoard = isEdBoard, isReferee = isReferee, isProjectLeader = isProjectLeader, isAuthor = isAuthor, authors = authors, title = title, sysno = sysno, newrn = newrn, ) if id_bskBASKET > 0: rights = basketdb.get_max_user_rights_on_basket(uid, id_bskBASKET) if not(__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READITM'])): return t comments = basketdb.get_comments(id_bskBASKET, sysno) if dProjectLeaderAction != None: user_can_add_comment = 0 else: user_can_add_comment = __check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']) t += websubmit_templates.tmpl_publiline_displaycplxdocitem( doctype, categ, RN, apptype, "AddComment", comments, (__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']), user_can_add_comment, __check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['DELCMT'])), selected_category=CFG_WEBBASKET_CATEGORIES['GROUP'], selected_topic=0, selected_group_id=id_group, ln=ln) return t def __check_basket_sufficient_rights(rights_user_has, rights_needed): """Private function, check if the rights are sufficient.""" try: out = CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_user_has) >= \ CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_needed) except ValueError: out = 0 return out def __is_PubCom (req,doctype): (isPubCom, auth_message) = acc_authorize_action(req, "pubcomchair",verbose=0,doctype=doctype) return isPubCom def __is_EdBoard (uid, id_EdBoardGroup): isEdBoard = None if id_EdBoardGroup > 0: edBoard = run_sql("""SELECT u.id FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE ug.id_usergroup = '%s' and user_status != 'A' AND user_status != 'P'""" % (id_EdBoardGroup, )) for uid_scan in edBoard: if uid == uid_scan[0]: isEdBoard = 0 break return isEdBoard def __is_Referee (uid, id_bskBASKET): isReferee = None if id_bskBASKET > 0: if basketdb.check_user_owns_baskets (uid, id_bskBASKET) == 1: isReferee = 0 return isReferee def __is_ProjectLeader (req, doctype, categ): (isProjectLeader, auth_message) = acc_authorize_action(req, "projectleader",verbose=0,doctype=doctype,categ=categ) return isProjectLeader def __is_Author (uid, sysno): email = Get_Field("8560_f",sysno) email = re.sub("[\n\r ]+","",email) uid_email = re.sub("[\n\r ]+","", acc_get_user_email(uid)) isAuthor = None if (re.search(uid_email,email,re.IGNORECASE) != None) and (uid_email != ""): isAuthor = 0 return isAuthor def __db_count_doc (doctype, categ, status, apptype): return run_sql("SELECT COUNT(*) FROM sbmCPLXAPPROVAL WHERE doctype=%s AND categ=%s AND status=%s AND type=%s",(doctype,categ,status,apptype,))[0][0] def __db_get_infos (key): return run_sql("SELECT status,id_group,id_bskBASKET,id_EdBoardGroup,dFirstReq,dLastReq,dEdBoardSel,dRefereeSel,dRefereeRecom,dEdBoardRecom,dPubComRecom,dProjectLeaderAction FROM sbmCPLXAPPROVAL WHERE rn=%s and type=%s", key) def __db_set_EdBoardSel_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardSel=NOW() WHERE rn=%s and type=%s", key) def __db_check_EdBoardGroup ((RN,apptype), id_EdBoardGroup, uid, group_descr): res = get_group_infos (id_EdBoardGroup) if len(res) == 0: id_EdBoardGroup = insert_new_group (uid, RN, group_descr % RN, "VM") run_sql("UPDATE sbmCPLXAPPROVAL SET id_EdBoardGroup=%s WHERE rn=%s and type=%s", (id_EdBoardGroup,RN,apptype,)) return id_EdBoardGroup def __db_set_basket ((RN,apptype), id_bsk): run_sql("UPDATE sbmCPLXAPPROVAL SET id_bskBASKET=%s, dRefereeSel=NOW() WHERE rn=%s and type=%s", (id_bsk,RN,apptype,)) def __db_set_RefereeRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dRefereeRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_EdBoardRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_PubComRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dPubComRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_status ((RN,apptype), status): run_sql("UPDATE sbmCPLXAPPROVAL SET status=%s, dProjectLeaderAction=NOW() WHERE rn=%s and type=%s", (status,RN,apptype,)) def doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, ln=CFG_SITE_LANG): """ Perform complex action. Note: all argume,ts are supposed to be washed already. Return HTML body for the paget. In case of errors, deletes hard drive. ;-) """ # load the right message language _ = gettext_set_language(ln) TEXT_RRP_RefereeSel_BASKET_DESCR = "Requests for refereeing process" TEXT_RRP_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection" TEXT_RRP_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :" TEXT_RRP_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication" TEXT_RRP_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication" TEXT_RRP_RefereeRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the referee" TEXT_RRP_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board" TEXT_RRP_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader" TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT = "You have been selected in a editorial board" TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY = "You have been selected as a member of the editorial board of this document :" TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR = "Editorial board for %s" TEXT_RPB_RefereeSel_BASKET_DESCR = "Requests for publication" TEXT_RPB_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection" TEXT_RPB_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :" TEXT_RPB_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication" TEXT_RPB_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication" TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT = "Final recommendation from the referee" TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the editorial board" TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board" TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader" t="" uid = getUid(req) if categ == "": categ = "unknown" key = (RN, apptype) infos = __db_get_infos (key) if len(infos) > 0: (status, id_group, id_bskBASKET, id_EdBoardGroup, dummy, dummy, dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0] else: return _("Approval has never been requested for this document.") + "
 " ## Removing call to deprecated "getInAlice" function and replacing it with ## a call to the newer "get_brief_doc_details_from_repository" function: ## try: ## (authors,title,sysno,newrn) = getInAlice(doctype,categ,RN) ## except TypeError: ## return _("Unable to display document.") item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: ## Details of the item were found in the CDS Invenio repository authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: ## Can't find any document details. return _("Unable to display document.") if (action == "EdBoardSel") and (apptype == "RPB"): if __is_PubCom (req, doctype) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " if validate == "go": if dEdBoardSel == None: __db_set_EdBoardSel_time (key) perform_request_send (uid, "", RN, TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT, TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY) return displayCplxDocument(req, doctype,categ,RN,apptype, ln) id_EdBoardGroup = __db_check_EdBoardGroup (key, id_EdBoardGroup, uid, TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR) subtitle1 = _('Adding users to the editorial board') # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) stopon1 = "" stopon2 = "" stopon3 = "" users = [] extrausers = [] # pattern is entered if email_user_pattern: # users with matching email-address users1 = run_sql("""SELECT id, email FROM user WHERE email RLIKE %s ORDER BY email """, (email_user_pattern, )) # users that are connected users2 = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE ug.id_usergroup = %s AND u.email RLIKE %s ORDER BY u.email """, (id_EdBoardGroup, email_user_pattern)) # no users that match the pattern if not (users1 or users2): stopon1 = '

%s

' % _("no qualified users, try new search.") elif len(users1) > MAXSELECTUSERS: stopon1 = '

%s %s, %s (%s %s)

' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS) # show matching users else: users = [] extrausers = [] for (user_id, email) in users1: if (user_id, email) not in users2: users.append([user_id,email,'']) for (user_id, email) in users2: extrausers.append([-user_id, email,'']) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user) if id_user < 0: stopon2 = '

%s

' % _("users in brackets are already attached to the role, try another one...") # a user is selected elif email_out: result = insert_new_member(id_user, id_EdBoardGroup, "M") stopon2 = '

confirm: user %s added to the editorial board.

' % (email_out, ) subtitle2 = _('Removing users from the editorial board') usersremove = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE ug.id_usergroup = %s and user_status != 'A' AND user_status != 'P' ORDER BY u.email """, (id_EdBoardGroup, )) try: id_user_remove = int(id_user_remove) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user_remove) # a user is selected if email_out: result = delete_member(id_EdBoardGroup, id_user_remove) stopon3 = '

confirm: user %s removed from the editorial board.

' % (email_out, ) t = websubmit_templates.tmpl_publiline_displaydocplxaction ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, images = images, authors = authors, title = title, sysno = sysno, subtitle1 = subtitle1, email_user_pattern = email_user_pattern, stopon1 = stopon1, users = users, extrausers = extrausers, stopon2 = stopon2, subtitle2 = subtitle2, usersremove = usersremove, stopon3 = stopon3, validate_btn = _("Validate the editorial board selection"), ) return t elif (action == "RefereeSel") and ((apptype == "RRP") or (apptype == "RPB")): if apptype == "RRP": to_check = __is_PubCom (req, doctype) TEXT_RefereeSel_BASKET_DESCR = TEXT_RRP_RefereeSel_BASKET_DESCR TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RRP_RefereeSel_MSG_REFEREE_SUBJECT TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RRP_RefereeSel_MSG_REFEREE_BODY TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RRP_RefereeSel_MSG_GROUP_SUBJECT TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RRP_RefereeSel_MSG_GROUP_BODY elif apptype == "RPB": to_check = __is_EdBoard (uid, id_EdBoardGroup) TEXT_RefereeSel_BASKET_DESCR = TEXT_RRP_RefereeSel_BASKET_DESCR TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RRP_RefereeSel_MSG_REFEREE_SUBJECT TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RRP_RefereeSel_MSG_REFEREE_BODY TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RRP_RefereeSel_MSG_GROUP_SUBJECT TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RRP_RefereeSel_MSG_GROUP_BODY else: to_check = None if to_check != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " if validate == "go": if dRefereeSel == None: id_bsk = basketdb.create_basket (int(id_user_val), RN, TEXT_RefereeSel_BASKET_DESCR) basketdb.share_basket_with_group (id_bsk, id_group, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']) basketdb.add_to_basket (int(id_user_val), (sysno, ), (id_bsk, )) __db_set_basket (key, id_bsk) referee_name = run_sql("""SELECT nickname FROM user WHERE id = %s """, (id_user_val, ))[0][0] perform_request_send (uid, referee_name, "", TEXT_RefereeSel_MSG_REFEREE_SUBJECT, TEXT_RefereeSel_MSG_REFEREE_BODY) group_name = run_sql("""SELECT name FROM usergroup WHERE id = %s""", (id_group, ))[0][0] perform_request_send (int(id_user_val), "", group_name, TEXT_RefereeSel_MSG_GROUP_SUBJECT, TEXT_RefereeSel_MSG_GROUP_BODY) return displayCplxDocument(req, doctype,categ,RN,apptype, ln) subtitle1 = _('Referee selection') # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) stopon1 = "" stopon2 = "" users = [] extrausers = [] # pattern is entered if email_user_pattern: # users with matching email-address users1 = run_sql("""SELECT id, email FROM user WHERE email RLIKE %s ORDER BY email """, (email_user_pattern, )) # no users that match the pattern if not users1: stopon1 = '

%s

' % _("no qualified users, try new search.") elif len(users1) > MAXSELECTUSERS: stopon1 = '

%s %s, %s (%s %s)

' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS) # show matching users else: users = [] for (user_id, email) in users1: users.append([user_id,email,'']) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user) # a user is selected if email_out: stopon2 = """

user %s will be the referee ?

""" % (email_out, id_user) t = websubmit_templates.tmpl_publiline_displaydocplxaction ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, images = images, authors = authors, title = title, sysno = sysno, subtitle1 = subtitle1, email_user_pattern = email_user_pattern, stopon1 = stopon1, users = users, extrausers = [], stopon2 = stopon2, subtitle2 = "", usersremove = [], stopon3 = "", validate_btn = "", ) return t elif (action == "AddAuthorList") and (apptype == "RPB"): return "" elif (action == "AddComment") and ((apptype == "RRP") or (apptype == "RPB")): t = "" if validate == "go": (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) t += "%(infos)s

" % {'infos' : infos[0]} t += """
""" % {'doctype' : doctype, 'categ' : categ, 'rn' : RN, 'apptype' : apptype, 'button_label' : _("Come back to the document"), } return t elif (action == "RefereeRecom") and ((apptype == "RRP") or (apptype == "RPB")): if __is_Referee (uid, id_bskBASKET) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " if apptype == "RRP": # Build publication committee chair's email address user_addr = "" # Try to retrieve the publication committee chair's email from the role database for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general publication committee chair's for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) group_addr = "" TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RRP_RefereeRecom_MSG_PUBCOM_SUBJECT elif apptype == "RPB": user_addr = "" group_addr = RN TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT else: user_addr = "" group_addr = "" TEXT_RefereeRecom_MSG_SUBJECT = "" if validate == "go": if dRefereeRecom == None: perform_request_send (uid, user_addr, group_addr, msg_subject, msg_body) __db_set_RefereeRecom_time (key) return displayCplxDocument(req, doctype,categ,RN,apptype, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, images = images, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = group_addr, msg_subject = TEXT_RefereeRecom_MSG_SUBJECT, ) return t elif (action == "EdBoardRecom") and (apptype == "RPB"): if __is_EdBoard (uid, id_EdBoardGroup) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " # Build publication committee chair's email address user_addr = "" # Try to retrieve the publication committee chair's email from the role database for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general publication committee chair's for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) if validate == "go": if dEdBoardRecom == None: perform_request_send (uid, user_addr, "", msg_subject, msg_body) __db_set_EdBoardRecom_time (key) return displayCplxDocument(req, doctype,categ,RN,apptype, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, images = images, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = "", msg_subject = TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT, ) return t elif (action == "PubComRecom") and ((apptype == "RRP") or (apptype == "RPB")): if __is_PubCom (req, doctype) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " # Build project leader's email address user_addr = "" # Try to retrieve the project leader's email from the role database for user in acc_get_role_users(acc_get_role_id("projectleader_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general project leader's for user in acc_get_role_users(acc_get_role_id("projectleader_%s_*" % doctype)): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) if apptype == "RRP": TEXT_PubComRecom_MSG_SUBJECT = TEXT_RRP_PubComRecom_MSG_PRJLEADER_SUBJECT elif apptype == "RPB": group_addr = RN TEXT_PubComRecom_MSG_SUBJECT = TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT else: TEXT_PubComRecom_MSG_SUBJECT = "" if validate == "go": if dPubComRecom == None: perform_request_send (uid, user_addr, "", msg_subject, msg_body) __db_set_PubComRecom_time (key) return displayCplxDocument(req, doctype,categ,RN,apptype, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, images = images, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = "", msg_subject = TEXT_PubComRecom_MSG_SUBJECT, ) return t elif (action == "ProjectLeaderDecision") and ((apptype == "RRP") or (apptype == "RPB")): if __is_ProjectLeader (req, doctype, categ) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " t += """
""" % {'doctype' : doctype, 'categ' : categ, 'rn' : RN, 'apptype' : apptype, 'button_label' : _("Come back to the document"), } if validate == "approve": if dProjectLeaderAction == None: (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) out = "%(infos)s

" % {'infos' : infos[0]} __db_set_status (key, 'approved') return out + t elif validate == "reject": if dProjectLeaderAction == None: (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) out = "%(infos)s

" % {'infos' : infos[0]} __db_set_status (key, 'rejected') return out + t validation = """ """ % {'select' : _('Select:'), 'approve' : _('Approve'), 'reject' : _('Reject'), 'button_label' : _('Take a decision'), } if apptype == "RRP": TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RRP_ProjectLeaderDecision_MSG_SUBJECT elif apptype == "RPB": TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT else: TEXT_ProjectLeaderDecision_MSG_SUBJECT = "" t = websubmit_templates.tmpl_publiline_displaywritecomment(doctype, categ, RN, apptype, action, _("Take a decision"), TEXT_ProjectLeaderDecision_MSG_SUBJECT, validation, ln) return t elif (action == "ProjectLeaderDecision") and (apptype == "RDA"): if __is_ProjectLeader (req, doctype, categ) != 0: return _("Action unauthorized for this document.") + "
 " if status == "cancelled": return _("Action unavailable for this document.") + "
 " if validate == "approve": if dProjectLeaderAction == None: __db_set_status (key, 'approved') return displayCplxDocument(req, doctype,categ,RN,apptype, ln) elif validate == "reject": if dProjectLeaderAction == None: __db_set_status (key, 'rejected') return displayCplxDocument(req, doctype,categ,RN,apptype, ln) t = """

""" % { 'rn' : RN, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'approve' : _('Approve'), 'reject' : _('Reject'), } return t elif (action == "AuthorCancel") and ((apptype == "RRP") or (apptype == "RPB") or (apptype == "RDA")): if __is_Author (uid, sysno) != 0: return _("Action unauthorized for this document.") + "
 " if (status == "cancelled") or (dProjectLeaderAction != None): return _("Action unavailable for this document.") + "
 " if validate == "go": __db_set_status (key, 'cancelled') return displayCplxDocument(req, doctype,categ,RN,apptype, ln) t = """

""" % { 'rn' : RN, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'cancel' : _('Cancel'), } return t else: return _("Wrong action for this document.") + "
 " return t def get_pending_item_details(doctype, reportnumber): """Given a doctype and reference number, try to retrieve an item's details. The first place to search for them should be the WebSubmit pending directory. If nothing is retrieved from there, and attempt is made to retrieve them from the CDS Invenio repository itself. @param doctype: (string) - the doctype of the item for which brief details are to be retrieved. @param reportnumber: (string) - the report number of the item for which details are to be retrieved. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found a NoneType is returned. """ ## First try to get the details of a document from the pending dir: item_details = get_brief_doc_details_from_pending(doctype, \ reportnumber) if item_details is None: item_details = get_brief_doc_details_from_repository(reportnumber) ## Return the item details: return item_details def get_brief_doc_details_from_pending(doctype, reportnumber): """Try to get some brief details about the submission that is awaiting the referee's decision. Details sought are: + title + Authors + recid (why?) + report-number (why?) This function searches for a MARC XML record in the pending submission's working directory. It prefers the so-called 'dummy' record, but will search for the final MARC XML record that would usually be passed to bibupload (i.e. recmysql) if that is not present. If neither of these records are present, no details will be found. @param doctype: (string) - the WebSubmit document type of the item to be refereed. It is used in order to locate the submission's working directory in the WebSubmit pending directory. @param reportnumber: (string) - the report number of the item for which details are to be recovered. It is used in order to locate the submission's working directory in the WebSubmit pending directory. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found (i.e. no MARC XML files in the submission's working directory), a NoneType is returned. """ pending_doc_details = None marcxml_rec_name = None ## Check for a MARC XML record in the pending dir. ## If it's there, we will use it to obtain certain bibliographic ## information such as title, author(s), etc, which we will then ## display to the referee. ## We favour the "dummy" record (created with the WebSubmit function ## "Make_Dummy_MARC_XML_Record"), because it was made for this ## purpose. If it's not there though, we'll take the normal ## (final) recmysql record that would generally be passed to bibupload. if os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ CFG_WEBSUBMIT_DUMMY_MARC_XML_REC), \ os.F_OK|os.R_OK): ## Found the "dummy" marc xml record in the submission dir. ## Use it: marcxml_rec_name = CFG_WEBSUBMIT_DUMMY_MARC_XML_REC elif os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ CFG_WEBSUBMIT_MARC_XML_REC), \ os.F_OK|os.R_OK): ## Although we didn't find the "dummy" marc xml record in the ## submission dir, we did find the "real" one (that which would ## normally be passed to bibupload). Use it: marcxml_rec_name = CFG_WEBSUBMIT_MARC_XML_REC ## If we have a MARC XML record in the pending submission's ## working directory, go ahead and use it: if marcxml_rec_name is not None: try: fh_marcxml_record = open("%s/%s/%s/%s" \ % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ marcxml_rec_name), "r") xmltext = fh_marcxml_record.read() fh_marcxml_record.close() except IOError: ## Unfortunately, it wasn't possible to read the details of the ## MARC XML record. Register the exception. exception_prefix = "Error: Publiline was unable to read the " \ "MARC XML record [%s/%s/%s/%s] when trying to " \ "use it to recover details about a pending " \ "submission." % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ marcxml_rec_name) register_exception(prefix=exception_prefix) else: ## Attempt to use bibrecord to create an internal representation ## of the record, from which we can extract certain bibliographic ## information: records = create_records(xmltext, 1, 1) try: record = records[0][0] except IndexError: ## Bibrecord couldn't successfully represent the record ## contained in the xmltext string. The record must have ## been empty or badly formed (or something). pass else: ## Dictionary to hold the interesting details of the ## pending item: pending_doc_details = { 'title' : '-', 'recid' : '', 'report-number' : '', 'authors' : [], } ## Get the recid: ## Note - the old "getInPending" function reads the "SN" ## file from the submission's working directory and since ## the "SN" file is currently "magic" and hardcoded ## throughout WebSubmit, I'm going to stick to this model. ## I could, however, have tried to get it from the MARC XML ## record as so: ## recid = record_get_field_value(rec=record, tag="001") try: fh_recid = open("%s/%s/%s/SN" \ % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber), "r") recid = fh_recid.read() fh_recid.close() except IOError: ## Probably, there was no "SN" file in the submission's ## working directory. pending_doc_details['recid'] = "" else: pending_doc_details['recid'] = recid.strip() ## Item report number (from record): ## Note: I don't know what purpose this serves. It appears ## to be used in the email that is sent to the author, but ## it seems funny to me, since we already have the report ## number (which is indeed used to find the submission's ## working directory in pending). Perhaps it's used for ## cases when the reportnumber is changed after approval? ## To investigate when time allows: finalrn = record_get_field_value(rec=record, \ tag="037", \ code="a") if finalrn != "": pending_doc_details['report-number'] = finalrn ## Item title: title = record_get_field_value(rec=record, \ tag="245", \ code="a") if title != "": pending_doc_details['title'] = title else: ## Alternative title: alt_title = record_get_field_value(rec=record, \ tag="246", \ ind1="1", \ code="a") if alt_title != "": pending_doc_details['title'] = alt_title ## Item first author: first_author = record_get_field_value(rec=record, \ tag="100", \ code="a") if first_author != "": pending_doc_details['authors'].append(first_author) ## Other Authors: other_authors = record_get_field_values(rec=record, \ tag="700", \ code="a") for author in other_authors: pending_doc_details['authors'].append(author) ## Return the details discovered about the pending document: return pending_doc_details def get_brief_doc_details_from_repository(reportnumber): """Try to get some brief details about the submission that is awaiting the referee's decision. Details sought are: + title + Authors + recid (why?) + report-number (why?) This function searches in the CDS Invenio repository, based on "reportnumber" for a record and then pulls the interesting fields from it. @param reportnumber: (string) - the report number of the item for which details are to be recovered. It is used in the search. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found a NoneType is returned. """ ## Details of the pending document, as found in the repository: pending_doc_details = None ## Search for records matching this "report number" found_record_ids = list(search_pattern(req=None, \ p=reportnumber, \ f="reportnumber", \ m="e")) ## How many records were found? if len(found_record_ids) == 1: ## Found only 1 record. Get the fields of interest: pending_doc_details = { 'title' : '-', 'recid' : '', 'report-number' : '', 'authors' : [], } recid = found_record_ids[0] ## Authors: first_author = get_fieldvalues(recid, "100__a") for author in first_author: pending_doc_details['authors'].append(author) other_authors = get_fieldvalues(recid, "700__a") for author in other_authors: pending_doc_details['authors'].append(author) ## Title: title = get_fieldvalues(recid, "245__a") if len(title) > 0: pending_doc_details['title'] = title[0] else: ## There was no value for title - check for an alternative title: alt_title = get_fieldvalues(recid, "2641_a") if len(alt_title) > 0: pending_doc_details['title'] = alt_title[0] ## Record ID: pending_doc_details['recid'] = recid ## Report Number: reptnum = get_fieldvalues(recid, "037__a") if len(reptnum) > 0: pending_doc_details['report-number'] = reptnum[0] elif len(found_record_ids) > 1: ## Oops. This is unexpected - there shouldn't be me multiple matches ## for this item. The old "getInAlice" function would have simply ## taken the first record in the list. That's not very nice though. ## Some kind of warning or error should be raised here. FIXME. pass return pending_doc_details # Retrieve info about document def getInfo(doctype,categ,RN): """FIXME: DEPRECATED!""" result = getInPending(doctype,categ,RN) if not result: result = getInAlice(doctype,categ,RN) return result #seek info in pending directory def getInPending(doctype,categ,RN): """FIXME: DEPRECATED!""" PENDIR="%s/pending" % CFG_WEBSUBMIT_STORAGEDIR if os.path.exists("%s/%s/%s/AU" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/AU" % (PENDIR,doctype,RN),"r") authors=fp.read() fp.close() else: authors = "" if os.path.exists("%s/%s/%s/TI" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/TI" % (PENDIR,doctype,RN),"r") title=fp.read() fp.close() else: title = "" if os.path.exists("%s/%s/%s/SN" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/SN" % (PENDIR,doctype,RN),"r") sysno=fp.read() fp.close() else: sysno = "" if title == "" and os.path.exists("%s/%s/%s/TIF" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/TIF" % (PENDIR,doctype,RN),"r") title=fp.read() fp.close() if title == "": return 0 else: return (authors,title,sysno,"") #seek info in Alice database def getInAlice(doctype,categ,RN): """FIXME: DEPRECATED!""" # initialize sysno variable sysno = "" searchresults = list(search_pattern(req=None, p=RN, f="reportnumber")) if len(searchresults) == 0: return 0 sysno = searchresults[0] if sysno != "": title = Get_Field('245__a',sysno) emailvalue = Get_Field('8560_f',sysno) authors = Get_Field('100__a',sysno) authors += "\n%s" % Get_Field('700__a',sysno) newrn = Get_Field('037__a',sysno) return (authors,title,sysno,newrn) else: return 0 def SendEnglish(doctype,categ,RN,title,authors,access,sysno): FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,supportemail) # retrieve useful information from webSubmit configuration res = run_sql("select value from sbmPARAMETERS where name='categformatDAM' and doctype=%s", (doctype,)) categformat = res[0][0] categformat = re.sub("","([^-]*)",categformat) categs = re.match(categformat,RN) if categs is not None: categ = categs.group(1) else: categ = "unknown" res = run_sql("select value from sbmPARAMETERS where name='addressesDAM' and doctype=%s",(doctype,)) if len(res) > 0: otheraddresses = res[0][0] otheraddresses = otheraddresses.replace("",categ) else: otheraddresses = "" # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database - for user in acc_get_role_users(acc_getRoleId("referee_%s_%s" % (doctype,categ))): + for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,categ))): refereeaddress += user[1] + "," # And if there are general referees - for user in acc_get_role_users(acc_getRoleId("referee_%s_*" % doctype)): + for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$","",refereeaddress) # Creation of the mail for the referee addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$","",addresses) if addresses=="": SendWarning(doctype,categ,RN,title,authors,access) return 0 if authors == "": authors = "-" res = run_sql("select value from sbmPARAMETERS where name='directory' and doctype=%s", (doctype,)) directory = res[0][0] message = """ The document %s has been published as a Communication. Your approval is requested for it to become an official Note. Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/record/%s/files/> To approve/reject the document, you should go to this URL: <%s/approve.py?%s> --------------------------------------------- Best regards. The submission team.""" % (RN,title,authors,urlpath,sysno,urlpath,access) # send the mail send_email(FROMADDR,addresses,"Request for Approval of %s" % RN, message,footer="") return "" def SendWarning(doctype,categ,RN,title,authors,access): FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,supportemail) message = "Failed sending approval email request for %s" % RN # send the mail send_email(FROMADDR,CFG_SITE_ADMIN_EMAIL,"Failed sending approval email request",message) return "" def errorMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="error", body = create_error_box(req, title=title,verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yourapprovals') def warningMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="warning", body = title, description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yourapprovals') diff --git a/modules/websubmit/web/yourapprovals.py b/modules/websubmit/web/yourapprovals.py index 4dbbfea96..54eb20819 100644 --- a/modules/websubmit/web/yourapprovals.py +++ b/modules/websubmit/web/yourapprovals.py @@ -1,122 +1,122 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" ## import interesting modules: import os import sys from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ sweburl, \ CFG_VERSION from invenio.dbquery import run_sql, Error from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import * from invenio.webpage import page, create_error_box from invenio.webuser import getUid, get_email, list_registered_users, page_not_authorized from invenio.messages import gettext_set_language, wash_language from invenio.websubmit_config import * from invenio.search_engine import search_pattern import invenio.template websubmit_templates = invenio.template.load('websubmit') def index(req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG,order="",doctype="",deletedId="",deletedAction="",deletedDoctype=""): global uid ln = wash_language(ln) # load the right message language _ = gettext_set_language(ln) t="" # get user ID: try: uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../yourapprovals.py/index", navmenuid='yourapprovals') u_email = get_email(uid) except Error, e: - return errorMsg(e.value,req, ln = ln) + return errorMsg(str(e), req, ln = ln) res = run_sql("select sdocname,ldocname from sbmDOCTYPE") referees = [] for row in res: doctype = row[0] docname = row[1] reftext = "" if isReferee(req,doctype,"*"): referees.append ({'doctype': doctype, 'docname': docname, 'categories': None}) else: res2 = run_sql("select sname,lname from sbmCATEGORIES where doctype=%s",(doctype,)) categories = [] for row2 in res2: category = row2[0] categname = row2[1] if isReferee(req,doctype,category): categories.append({ 'id' : category, 'name' : categname, }) referees.append({ 'doctype' : doctype, 'docname' : docname, 'categories' : categories }) t = websubmit_templates.tmpl_yourapprovals( ln = ln, referees = referees ) return page(title=_("Your Approvals"), navtrail= """%(account)s""" % { 'sweburl' : sweburl, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yourapprovals') def isReferee(req,doctype="",categ=""): (auth_code, auth_message) = acc_authorize_action(req, "referee",verbose=0,doctype=doctype, categ=categ) if auth_code == 0: return 1 else: return 0 def errorMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="error", body = create_error_box(req, title=title,verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, language=ln, req=req, navmenuid='yourapprovals') diff --git a/modules/websubmit/web/yoursubmissions.py b/modules/websubmit/web/yoursubmissions.py index 7c18b4aea..1f536be1a 100644 --- a/modules/websubmit/web/yoursubmissions.py +++ b/modules/websubmit/web/yoursubmissions.py @@ -1,218 +1,218 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" ## import interesting modules: import string import os import sys import time import types import re import shutil import operator from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ images, \ CFG_WEBSUBMIT_STORAGEDIR, \ sweburl, \ CFG_VERSION, \ weburl from invenio.dbquery import run_sql, Error from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import * from invenio.webpage import page, create_error_box from invenio.webuser import getUid, get_email, list_registered_users, page_not_authorized from invenio.messages import gettext_set_language, wash_language from invenio.websubmit_config import * from invenio.search_engine import search_pattern import invenio.template websubmit_templates = invenio.template.load('websubmit') def index(req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG,order="",doctype="",deletedId="",deletedAction="",deletedDoctype=""): global uid ln = wash_language(ln) # load the right message language _ = gettext_set_language(ln) t="" # get user ID: try: uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../yoursubmissions.py/index", navmenuid='yoursubmissions') u_email = get_email(uid) except Error, e: - return errorMsg(e.value, req, ln) + return errorMsg(str(e), req, ln=ln) if u_email == "guest" or u_email == "": return warningMsg(websubmit_templates.tmpl_warning_message( ln = ln, msg = _("Sorry, you must log in to perform this action."), ),req, ln = ln) if deletedId != "": t += deleteSubmission(deletedId,deletedAction,deletedDoctype,u_email) # doctypes res = run_sql("select ldocname,sdocname from sbmDOCTYPE order by ldocname") doctypes = [] for row in res: doctypes.append({ 'id' : row[1], 'name' : row[0], 'selected' : (doctype == row[1]), }) # submissions # request order default value reqorder = "sbmSUBMISSIONS.md DESC, lactname" # requested value if order == "actiondown": reqorder = "lactname ASC, sbmSUBMISSIONS.md DESC" elif order == "actionup": reqorder = "lactname DESC, sbmSUBMISSIONS.md DESC" elif order == "refdown": reqorder = "reference ASC, sbmSUBMISSIONS.md DESC, lactname DESC" elif order == "refup": reqorder = "reference DESC, sbmSUBMISSIONS.md DESC, lactname DESC" elif order == "cddown": reqorder = "sbmSUBMISSIONS.cd DESC, lactname" elif order == "cdup": reqorder = "sbmSUBMISSIONS.cd ASC, lactname" elif order == "mddown": reqorder = "sbmSUBMISSIONS.md DESC, lactname" elif order == "mdup": reqorder = "sbmSUBMISSIONS.md ASC, lactname" elif order == "statusdown": reqorder = "sbmSUBMISSIONS.status DESC, lactname" elif order == "statusup": reqorder = "sbmSUBMISSIONS.status ASC, lactname" if doctype != "": docselect = " and doctype='%s' " % doctype else: docselect = "" res = run_sql("SELECT sbmSUBMISSIONS.* FROM sbmSUBMISSIONS,sbmACTION WHERE sactname=action and email=%s and id!='' "+docselect+" ORDER BY doctype,"+reqorder,(u_email,)) currentdoctype = "" currentaction = "" currentstatus = "" submissions = [] for row in res: if currentdoctype != row[1]: currentdoctype = row[1] currentaction = "" currentstatus = "" res2 = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",(currentdoctype,)) if res2: ldocname = res2[0][0] else: ldocname = """***Unknown Document Type - (%s)""" % (currentdoctype,) if currentaction != row[2]: currentaction = row[2] res2 = run_sql("SELECT lactname FROM sbmACTION WHERE sactname=%s",(currentaction,)) if res2: lactname = res2[0][0] else: lactname = "\"" else: lactname = "\"" if currentstatus != row[3]: currentstatus = row[3] status=row[3] else: status = "\"" submissions.append({ 'docname' : ldocname, 'actname' : lactname, 'status' : status, 'cdate' : row[6], 'mdate' : row[7], 'reference' : row[5], 'id' : row[4], 'act' : currentaction, 'doctype' : currentdoctype, 'pending' : (row[3] == "pending") }) # display t += websubmit_templates.tmpl_yoursubmissions( ln = ln, weburl = weburl, images = images, order = order, doctypes = doctypes, submissions = submissions, ) return page(title=_("Your Submissions"), navtrail= """%(account)s""" % { 'sweburl' : sweburl, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yoursubmissions') def deleteSubmission(id, action, doctype, u_email): global CFG_WEBSUBMIT_STORAGEDIR run_sql("delete from sbmSUBMISSIONS WHERE doctype=%s and action=%s and email=%s and status='pending' and id=%s",(doctype,action,u_email,id,)) res = run_sql("select dir from sbmACTION where sactname=%s",(action,)) dir = res[0][0] if not ('..' in doctype or '..' in id) and id != "": full = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, dir, doctype, id) if os.path.isdir(full): shutil.rmtree(full) return "" def warningMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="warning", body = title, description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yoursubmissions') def errorMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="error", body = create_error_box(req, title=title,verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yoursubmissions')