diff --git a/modules/websearch/bin/webcoll.in b/modules/websearch/bin/webcoll.in index c0fad6716..c5a05bb4e 100644 --- a/modules/websearch/bin/webcoll.in +++ b/modules/websearch/bin/webcoll.in @@ -1,1302 +1,1300 @@ ## $Id$ ## Script that creates collection pages, starting from the collection ## passed to the script as an argument. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" #include "cdswmllib.wml" ## start Python: #! ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """Creates CDSware collection specific pages, using WML and MySQL configuration tables.""" __version__ = "<: print generate_pretty_version_string('$Id$'); :>" ## fill config variables: pylibdir = "/python" ### okay, rest of the Python code goes below ### ## import modules: try: import copy import getopt import getpass import marshal import signal import sys import cgi import sre import os import math import string import urllib import zlib import MySQLdb import Numeric import time import traceback except ImportError, e: print "Error: %s" % e import sys sys.exit(1) try: sys.path.append('%s' % pylibdir) from cdsware.config import * from cdsware.messages import * from cdsware.search_engine import HitSet, search_pattern, get_creation_date, nice_number, get_field_i18nname from cdsware.search_engine_config import cfg_author_et_al_threshold, cfg_instant_browse, cfg_max_recID, cfg_narrow_search_show_grandsons from cdsware.dbquery import run_sql from cdsware.access_control_engine import acc_authorize_action from cdsware.bibrank_record_sorter import get_bibrank_methods except ImportError, e: print "Error: %s" % e import sys sys.exit(1) ## global vars collection_house = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ... options = {} # will hold task options def get_collection(colname): """Return collection object from the collection house for given colname. If does not exist, then create it.""" if not collection_house.has_key(colname): colobject = Collection(colname) collection_house[colname] = colobject return collection_house[colname] ## auxiliary functions: def mymkdir(newdir, mode=0777): """works the way a good mkdir should :) - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well """ if os.path.isdir(newdir): pass elif os.path.isfile(newdir): raise OSError("a file with the same name as the desired " \ "dir, '%s', already exists." % newdir) else: head, tail = os.path.split(newdir) if head and not os.path.isdir(head): mymkdir(head, mode) if tail: os.umask(022) os.mkdir(newdir, mode) def escape_string(s): "Escapes special chars in string. For MySQL queries." s = MySQLdb.escape_string(s) return s def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if var == fld: return " selected" else: return "" def write_message(msg, stream=sys.stdout): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) return def create_andornot_box(name='op', value='', ln='en'): "Returns HTML code for the AND/OR/NOT selection box." out = """ """ % (name, is_selected('a', value), msg_and[ln], is_selected('o', value), msg_or[ln], is_selected('n', value), msg_and_not[ln]) return out def create_matchtype_box(name='m', value='', ln='en'): "Returns HTML code for the 'match type' selection box." out = """ """ % (name, is_selected('a', value), msg_all_of_the_words[ln], is_selected('o', value), msg_any_of_the_words[ln], is_selected('e', value), msg_exact_phrase[ln], is_selected('p', value), msg_partial_phrase[ln], is_selected('r', value), msg_regular_expression[ln]) return out def get_field(recID, tag): "Gets list of field 'tag' for the record with 'recID' system number." out = [] digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \ % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def print_record(recID, format='hb', ln=cdslang): "Prints record 'recID' formatted accoding to 'format'." out = "" # HTML brief format by default query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % zlib.decompress(res[0][0]) else: # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_field(recID, "245__a") for title in titles: out += "%s " % cgi.escape(title) # secondly, authors: authors = get_field(recID, "100__a") + get_field(recID, "700__a") if authors: out += " / " for i in range (0,cfg_author_et_al_threshold): if i < len(authors): out += """%s ;""" \ % (weburl, urllib.quote(authors[i]), cgi.escape(authors[i])) if len(authors) > cfg_author_et_al_threshold: out += " et al." # thirdly, date of creation: dates = get_field(recID, "260__c") for date in dates: out += " %s." % cgi.escape(date) # thirdly bis, report numbers: rns = get_field(recID, "037__a") for rn in rns: out += """ [%s]""" % cgi.escape(rn) rns = get_field(recID, "088__a") for rn in rns: out += """ [%s]""" % cgi.escape(rn) # fourthly, beginning of abstract: abstracts = get_field(recID, "520__a") for abstract in abstracts: out += "
%s [...]" % cgi.escape(abstract[:1+string.find(abstract, '.')]) # fifthly, fulltext link: urls_z = get_field(recID, "8564_z") urls_u = get_field(recID, "8564_u") for idx in range(0,len(urls_u)): out += """
%s""" % (urls_u[idx], urls_u[idx]) # at the end of HTML mode, print "Detailed record" and "Mark record" functions: out += """
%s""" \ % (weburl, recID, ln, msg_detailed_record[ln]) out += """ - %s\n""" % \ (weburl, recID, ln, msg_similar_records[ln]) #out += """<- Mark record""" % recID out += "" return out class Collection: "Holds the information on collections (id,name,dbquery)." def __init__(self, name=""): "Creates collection instance by querying the MySQL configuration database about 'name'." self.calculate_reclist_run_already = 0 # to speed things up wihtout much refactoring self.update_reclist_run_already = 0 # to speed things up wihtout much refactoring self.reclist_with_nonpublic_subcolls = HitSet() if not name: self.name = cdsname # by default we are working on the home page self.id = 1 self.dbquery = None self.nbrecs = None self.reclist = HitSet() else: self.name = name query = "SELECT id,name,dbquery,nbrecs,reclist FROM collection WHERE name='%s'" % escape_string(name) try: res = run_sql(query, None, 1) if res: self.id = res[0][0] self.name = res[0][1] self.dbquery = res[0][2] self.nbrecs = res[0][3] try: self.reclist = HitSet(Numeric.loads(zlib.decompress(res[0][5]))) except: self.reclist = HitSet() else: # collection does not exist! self.id = None self.dbquery = None self.nbrecs = None self.reclist = HitSet() except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1) def get_name(self, ln=cdslang, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""): """Return nicely formatted collection name for language LN. The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc.""" out = prolog i18name = "" res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type)) try: i18name += res[0][0] except IndexError: pass if i18name: out += i18name else: out += self.name out += epilog return out def get_ancestors(self): "Returns list of ancestors of the current collection." ancestors = [] id_son = self.id while 1: query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son) res = run_sql(query, None, 1) if res: col_ancestor = get_collection(res[0][1]) ancestors.append(col_ancestor) id_son = res[0][0] else: break ancestors.reverse() return ancestors def restricted_p(self): """Predicate to test if the collection is restricted or not. Return the contect of the `restrited' column of the collection table (typically Apache group). Otherwise return None if the collection is public.""" out = None query = "SELECT restricted FROM collection WHERE id=%d" % self.id res = run_sql(query, None, 1) try: out = res[0][0] except: pass return out def get_sons(self, type='r'): "Returns list of direct sons of type 'type' for the current collection." sons = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type) res = run_sql(query) for row in res: sons.append(get_collection(row[1])) return sons def get_descendants(self, type='r'): "Returns list of all descendants of type 'type' for the current collection." descendants = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type) res = run_sql(query) for row in res: col_desc = get_collection(row[1]) descendants.append(col_desc) descendants += col_desc.get_descendants() return descendants def write_cache_file(self, filename='', filebody=''): "Write a file inside collection cache." # open file: dirname = "%s/collections/%d" % (cachedir, self.id) mymkdir(dirname) fullfilename = dirname + "/%s.html" % filename try: os.umask(022) f = open(fullfilename, "w") except IOError, v: try: (code, message) = v except: code = 0 message = v print "I/O Error: " + str(message) + " (" + str(code) + ")" sys.exit(1) # print user info: if options["verbose"] >= 3: write_message("... creating %s" % fullfilename) sys.stdout.flush() # print page body: f.write(filebody) # close file: f.close() def update_webpage_cache(self): """Create collection page header, navtrail, body (including left and right stripes) and footer, and call write_cache_file() afterwards to update the collection webpage cache.""" ## do this for each language: for lang in <: print generate_language_list_for_python(); :>: ## first, update navtrail: for as in range(0,2): self.write_cache_file("navtrail-as=%s-ln=%s" % (as, lang), self.create_navtrail_links(as, lang)) ## second, update page body: for as in range(0,2): # do both simple search and advanced search pages: body = "" body += """
""" % weburl - body += self.create_portalbox(lang, 'te') body += "" + self.create_searchfor(as, lang) body += self.create_portalbox(lang, 'np') body += """""" body += """""" body += """" body_focuson = self.create_narrowsearch(as, lang, msg_focus_on[lang], "v") if body_focuson: body += """""" body += "
""" + self.create_narrowsearch(as, lang, msg_narrow_search[lang]) + """" + body_focuson + """
" body += self.create_portalbox(lang, 'ne') body += "
" self.write_cache_file("body-as=%s-ln=%s" % (as, lang), body) ## third, write portalboxes: + self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp")) + self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te")) self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt")) - self.write_cache_file("portalbox-lb-ln=%s" % lang, self.create_portalbox(lang, "lb")) self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt")) - self.write_cache_file("portalbox-rb-ln=%s" % lang, self.create_portalbox(lang, "rb")) ## fourth, write 'last updated' information: self.write_cache_file("last-updated-ln=%s" % lang, time.strftime("%02d %b %04Y %02H:%02M:%02S %Z", time.localtime())) return def create_navtrail_links(self, \ as=0, ln=cdslang, separator=" > "): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If as==1, then links to Advanced Search interfaces; otherwise Simple Search. """ out = "" for dad in self.get_ancestors(): if dad.name != cdsname: # exclude Home collection if out: out += separator out += """%s""" % \ (weburl, urllib.quote_plus(dad.name), as, ln, dad.get_name(ln)) return out def create_nbrecs_info(self, ln=cdslang, prolog=""" (""", epilog=""")"""): "Return information on the number of records." out = "" if self.nbrecs: out = prolog + nice_number(self.nbrecs, ln) + epilog return out def create_portalbox(self, lang=cdslang, position="rt"): """Creates portalboxes of language CDSLANG of the position POSITION by consulting MySQL configuration database. - The position may be: 'lt'='left top', 'lb'='left bottom', 'rt'='right top', 'rb'='right bottom', - 'tl'='top left', 'tr'='top right', 'bl'='bottom left', 'br='bottom right'.""" + The position may be: 'lt'='left top', 'rt'='right top', etc.""" out = "" query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\ " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\ " ORDER BY cp.score DESC" % (self.id, lang, position) res = run_sql(query) for row in res: title, body = row[0], row[1] if title: out += """
%s
%s
""" % (title, body) else: # no title specified, so print body ``as is'' only: out += body return out def create_narrowsearch(self, as=0, ln=cdslang, title="Narrow search", type="r"): """Creates list of collection descendants of type 'type' under title 'title'. If as==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes.""" narrowsearch="" # return nothing for type 'v' (virtual collection) if there are no sons: if type == 'v' and not self.get_sons(type): return "" # firstly write silent 'cc' (=current collection) argument: if type == 'r': # but not for virtual collections narrowsearch += """""" % self.name # then get list of sons and analyse it: sons = self.get_sons(type) # decide upon writing style: if there are grandchildren, then print in bold descendants = self.get_descendants(type) if len(descendants)>len(sons): style_prolog = "" style_epilog = "" else: style_prolog = "" style_epilog = "" # are there some sons? if len(sons): narrowsearch += """""" % title # iterate through sons: for son in sons: narrowsearch += """""" % son.name else: narrowsearch += """ """ % son.name narrowsearch += """""" narrowsearch += "
%s
""" if type=='r': if son.restricted_p() and son.restricted_p() != self.restricted_p(): narrowsearch += """ %s%s%s%s """ % \ (weburl, urllib.quote_plus(son.name), as, ln, style_prolog, son.get_name(ln), style_epilog, son.create_nbrecs_info(ln)) if son.restricted_p(): narrowsearch += """ [%s]""" % msg_restricted[ln] if cfg_narrow_search_show_grandsons: # iterate trough grandsons: grandsons = son.get_sons() nb_grandsons = len(grandsons) if nb_grandsons: narrowsearch += """
""" for i in range(0,nb_grandsons): narrowsearch += """%s%s """ % \ (weburl, urllib.quote_plus(grandsons[i].name), as, ln, grandsons[i].get_name(ln), \ grandsons[i].create_nbrecs_info(ln)) narrowsearch += """
" else: if type == 'r': # no sons, and type 'r', so print info on collection content: narrowsearch += """
%s
%s
""" % (msg_latest_additions[ln], self.create_instant_browse(ln=ln)) return narrowsearch def create_instant_browse(self, rg=cfg_instant_browse, ln=cdslang): "Searches database and produces list of last 'rg' records." box = "" if self.restricted_p(): box += msg_collection_restricted_content[ln] else: url = "%s/search.py?cc=%s&jrec=%d" % (weburl, urllib.quote_plus(self.name), rg+1) if self.nbrecs and self.reclist: # firstly, get last 'rg' records: box += """""" recIDs = Numeric.nonzero(self.reclist._set) for idx in range(self.nbrecs-1, self.nbrecs-rg-1, -1): if idx>=0: box += """""" % (get_creation_date(recIDs[idx],fmt="%Y-%m-%d
%H:%i"), print_record(recIDs[idx], ln=ln)) box += "
%s %s
" if self.nbrecs > rg: box += """
[>> %s]
""" % (url, ln, msg_more[ln]) else: box += msg_collection_contains_no_records[ln] return box def create_searchoptions(self): "Produces 'Search options' portal box." box="" query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id ORDER BY cff.score DESC""" % self.id res = run_sql(query) if res: for row in res: field_id = row[0] field_code = row[1] field_name = row[2] query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id) res_bis = run_sql(query_bis) if res_bis: box += """""" return box def create_inputdate(self, name="d1", ln=cdslang): "Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options." box = "" # day box += """""" # month box += """""" # year box += """""" return box def create_publishedin(self, title="From date", name="d1"): """Produces 'Published in' selection box, if collection name contains the text 'Articles'. Suitable for advanced search option.""" out = "" if string.find(self.name, "Article") == -1: return out # journal title: cell_1a = "Published in:
" cell_1a += """""" # volume: cell_1b = "Volume:
" cell_1b += """""" # year: cell_1c = "Year:
" cell_1c += """""" # page: cell_1d = "Page:
" cell_1d += """""" out += """
""" % \ (cell_1a, cell_1b, cell_1c, cell_1d) return out def create_sortoptions(self, ln=cdslang): "Produces 'Sort options' portal box." box="" query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id box += """""" box += """""" % (msg_ascending[ln], msg_descending[ln]) return box def create_rankoptions(self, ln=cdslang): "Produces 'Rank options' portal box." box="" box += """""" return box def create_displayoptions(self, ln=cdslang): "Produces 'Display options' portal box." box="" # firstly, display hits per collection selection box: box += """""" % (msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln]) # secondly, display split by collection selection box: if self.get_sons(): box += """""" % (msg_split_by_collection[ln], msg_single_list[ln]) return box def create_formatoptions(self, ln=cdslang): "Produces 'Output format options' portal box." box = "" box += """""" return box def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'): "Produces 'search within' selection box for the current collection." out = "" out += """""" return out def create_searchexample(self): "Produces search example(s) for the current collection." out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id return out def create_searchfor(self, as=0, ln=cdslang): "Produces either Simple or Advanced 'Search for' box for the current collection." if as == 1: return self.create_searchfor_advanced(ln) else: return self.create_searchfor_simple(ln) def create_searchfor_simple(self, ln=cdslang): "Produces simple 'Search for' box for the current collection." # print commentary start: out = "" out += """""" out += """""" % ln # define URL add-ons for simple and advanced search boxen: if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln # define search box elements: header = msg_search_records_for[ln] % self.create_nbrecs_info(ln, "","") cell_1_left = """""" cell_1_middle = "%s" % self.create_searchwithin_selection_box(ln=ln) cell_1_right = """""" % (msg_search[ln], msg_browse[ln]) cell_2 = """%s :: %s""" \ % (weburl, ln, msg_search_tips[ln], asearchurl, msg_advanced_search[ln]) # merge them: out += """""" % \ (header, cell_1_left, cell_1_middle, cell_1_right, cell_2) # print commentary end: out += "" return out def create_searchfor_advanced(self, ln=cdslang): "Produces simple 'Search for' box for the current collection." # print commentary start: out = "" out += """""" out += """""" % ln # define URL add-ons for simple and advanced search boxen: if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln # define search box elements: header = msg_search_records_for[ln] % self.create_nbrecs_info(ln, "", "") cell_1_left = create_matchtype_box('m1', ln=ln) + """""" cell_1_middle = self.create_searchwithin_selection_box('f1', ln=ln) cell_1_right = create_andornot_box('op1', ln=ln) cell_2_left = create_matchtype_box('m2', ln=ln) + """""" cell_2_middle = self.create_searchwithin_selection_box('f2', ln=ln) cell_2_right = create_andornot_box('op2', ln=ln) cell_3_left = create_matchtype_box('m3', ln=ln) + """""" cell_3_middle = self.create_searchwithin_selection_box('f3', ln=ln) cell_3_right = """ """ % (msg_search[ln], msg_browse[ln]) cell_4 = """%s :: %s""" \ % (weburl, ln, msg_search_tips[ln], ssearchurl, msg_simple_search[ln]) # merge them: out += """""" % \ (header, cell_1_left, cell_1_middle, cell_1_right, \ cell_2_left, cell_2_middle, cell_2_right, \ cell_3_left, cell_3_middle, cell_3_right, \ cell_4) # create more search options, if any: if self.create_searchoptions(): out += """""" % (msg_search_options[ln], self.create_searchoptions()) if 0: # FIXME: create published in, if appropriate: if self.create_publishedin(): out += self.create_publishedin() # arrival date: from/until: cell_6_a = self.create_inputdate("d1", ln=ln) cell_6_b = self.create_inputdate("d2", ln=ln) out += """""" % \ (msg_added_since[ln], msg_until[ln], cell_6_a, cell_6_b) # sort/rank and display/format options: cell_7_a = self.create_sortoptions(ln) + self.create_rankoptions(ln) cell_7_b = self.create_displayoptions(ln) cell_7_c = self.create_formatoptions(ln) out += """""" % \ (msg_sort_by[ln], msg_display_results[ln], msg_output_format[ln], cell_7_a, cell_7_b, cell_7_c) # print commentary end: out += "" return out def calculate_reclist(self): """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection.""" if self.calculate_reclist_run_already: # do we have to recalculate? return (self.reclist, self.reclist_with_nonpublic_subcolls) if options["verbose"] >= 3: write_message("... calculating reclist of %s" % self.name) reclist = HitSet() # will hold results for public sons only; good for storing into DB reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total # number of documents if not self.dbquery: # A - collection does not have dbquery, so query recursively all its sons # that are either non-restricted or that have the same restriction rules for coll in self.get_sons(): coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist() if ((coll.restricted_p() is None) or (coll.restricted_p() == self.restricted_p())): # add this reclist ``for real'' only if it is public reclist.union(coll_reclist) reclist_with_nonpublic_subcolls.union(coll_reclist_with_nonpublic_subcolls) else: # B - collection does have dbquery, so compute it: reclist = search_pattern(None,self.dbquery) reclist_with_nonpublic_subcolls = copy.deepcopy(reclist) # deduce the number of records: reclist.calculate_nbhits() reclist_with_nonpublic_subcolls.calculate_nbhits() # store the results: self.nbrecs = reclist_with_nonpublic_subcolls._nbhits self.reclist = reclist self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls # last but not least, update the speed-up flag: self.calculate_reclist_run_already = 1 # return the two sets: return (self.reclist, self.reclist_with_nonpublic_subcolls) def update_reclist(self): "Update the record universe for given collection; nbrecs, reclist of the collection table." if self.update_reclist_run_already: # do we have to reupdate? return 0 if options["verbose"] >= 3: write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs)) sys.stdout.flush() try: query = "UPDATE collection SET nbrecs=%d, reclist='%s' WHERE id=%d" % \ (self.nbrecs, escape_string(zlib.compress(Numeric.dumps(self.reclist._set))), self.id) res = run_sql(query) self.reclist_updated_since_start = 1 except MySQLdb.Error, e: print "Database Query Error %d: %s." % (e.args[0], e.args[1]) sys.exit(1) # last but not least, update the speed-up flag: self.update_reclist_run_already = 1 return 0 def usage(code, msg=''): "Prints usage info." if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [collection][+]\n" % sys.argv[0]) sys.stderr.write("""Description: %s updates the collection cache (record universe for a given collection plus web page elements) based on WML and MySQL configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""" % sys.argv[0]) sys.stderr.write("Example: %s update-reclist\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"+\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"\"\n" % sys.argv[0]) sys.stderr.write("Example: %s update-reclist \"\"+\n" % sys.argv[0]) sys.exit(code) def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re=sre.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr).""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("stopping...") task_update_status("STOPPING") pass # FIXME: is there anything to be done? task_update_status("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" write_message("unknown signal %d ignored" % sig) # do nothing for other signals def authenticate(user, header="WebColl Task Submission", action="runwebcoll"): """Authenticate the user against the user database. Check for its password, if it exists. Check for action access rights. Return user name upon authorization success, do system exit upon authorization failure. """ print header print "=" * len(header) if user == "": print >> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print >> sys.stdout, "\rUsername: ", user ## first check user pw: res = run_sql("select id,password from user where email=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) ## secondly check authorization for the action: (auth_code, auth_message) = acc_authorize_action(uid_db, action) if auth_code != 0: print auth_message sys.exit(1) return user def task_submit(options): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: if options["verbose"] >= 9: print "" write_message("storing task options %s\n" % options) task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments) VALUES (NULL,'webcoll',%s,%s,%s,'WAITING',%s)""", (user, options["runtime"], options["sleeptime"], marshal.dumps(options))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, task_id)) def task_update_status(val): """Updates status information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, task_id)) def task_read_status(task_id): """Read status information in the BibSched task table.""" res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='webcoll'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: WebColl task %d does not seem to exist." % id) sys.exit(1) return out def task_run(): """Run the WebColl task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task will update collection reclist cache and collection web pages for given collection. (default is all). Arguments described in usage() function. Return 1 in case of success and 0 in case of failure.""" global task_id, options options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a WebColl task." % task_id) return 0 ## check task status: task_status = task_read_status(task_id) if task_status != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_status)) return 0 ## we can run the task now: if options["verbose"]: write_message("Task #%d started." % task_id) task_update_status("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) colls = [] # firstly, decide which collections to do: if options.has_key("collection"): coll = get_collection(options["collection"]) if coll.id == None: usage(1, 'Collection %s does not exist' % coll.name) colls.append(coll) else: res = run_sql("SELECT name FROM collection ORDER BY id") for row in res: colls.append(get_collection(row[0])) # secondly, update collection reclist cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / reclist cache update" % coll.name) coll.calculate_reclist() coll.update_reclist() task_update_progress("Part 1/2: done %d/%d" % (i,len(colls))) # thirdly, update collection webpage cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / web cache update" % coll.name) coll.update_webpage_cache() task_update_progress("Part 2/2: done %d/%d" % (i,len(colls))) ## we are done: task_update_progress("Done.") task_update_status("DONE") if options["verbose"]: write_message("Task #%d finished." % task_id) return 1 def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write("Command options:\n") sys.stderr.write(" -c, --collection\t Update only given collection. [all]\n") sys.stderr.write("Scheduling options:\n") sys.stderr.write(" -u, --user=USER \t User name to submit the task as, password needed.\n") sys.stderr.write(" -t, --runtime=TIME \t Time to execute the task (now), e.g.: +15s, 5m, 3h, 2002-10-27 13:57:26\n") sys.stderr.write(" -s, --sleeptime=SLEEP \t Sleeping frequency after which to repeat task (no), e.g.: 30m, 2h, 1d\n") sys.stderr.write("General options:\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -v, --verbose=LEVEL \t Verbose level (from 0 to 9, default 1).\n") sys.stderr.write("""Description: %s updates the collection cache (record universe for a given collection plus web page elements) based on WML and MySQL configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""" % sys.argv[0]) sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global task_id ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) try: if not task_run(): write_message("Error occurred. Exiting.", sys.stderr) except StandardError, e: write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:", sys.stderr) traceback.print_tb(sys.exc_info()[2]) write_message("Exiting.", sys.stderr) task_update_status("ERROR") else: ## B - submit the task # set default values: options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S") options["verbose"] = 1 options["sleeptime"] = "" # set user-defined options: try: opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:c:", ["help", "version", "verbose=","user=","sleep=","time=","collection="]) except getopt.GetoptError, err: usage(1, err) try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __version__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-v", "--verbose"]: options["verbose"] = int(opt[1]) elif opt[0] in [ "-s", "--sleeptime" ]: get_datetime(opt[1]) # see if it is a valid shift options["sleeptime"] = opt[1] elif opt[0] in [ "-t", "--runtime" ]: options["runtime"] = get_datetime(opt[1]) elif opt[0] in [ "-c", "--collection"]: options["collection"] = opt[1] else: usage(1) except StandardError, e: usage(e) task_submit(options) return ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/websearch/bin/webcoll.wml b/modules/websearch/bin/webcoll.wml index c0fad6716..c5a05bb4e 100644 --- a/modules/websearch/bin/webcoll.wml +++ b/modules/websearch/bin/webcoll.wml @@ -1,1302 +1,1300 @@ ## $Id$ ## Script that creates collection pages, starting from the collection ## passed to the script as an argument. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" #include "cdswmllib.wml" ## start Python: #! ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """Creates CDSware collection specific pages, using WML and MySQL configuration tables.""" __version__ = "<: print generate_pretty_version_string('$Id$'); :>" ## fill config variables: pylibdir = "/python" ### okay, rest of the Python code goes below ### ## import modules: try: import copy import getopt import getpass import marshal import signal import sys import cgi import sre import os import math import string import urllib import zlib import MySQLdb import Numeric import time import traceback except ImportError, e: print "Error: %s" % e import sys sys.exit(1) try: sys.path.append('%s' % pylibdir) from cdsware.config import * from cdsware.messages import * from cdsware.search_engine import HitSet, search_pattern, get_creation_date, nice_number, get_field_i18nname from cdsware.search_engine_config import cfg_author_et_al_threshold, cfg_instant_browse, cfg_max_recID, cfg_narrow_search_show_grandsons from cdsware.dbquery import run_sql from cdsware.access_control_engine import acc_authorize_action from cdsware.bibrank_record_sorter import get_bibrank_methods except ImportError, e: print "Error: %s" % e import sys sys.exit(1) ## global vars collection_house = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ... options = {} # will hold task options def get_collection(colname): """Return collection object from the collection house for given colname. If does not exist, then create it.""" if not collection_house.has_key(colname): colobject = Collection(colname) collection_house[colname] = colobject return collection_house[colname] ## auxiliary functions: def mymkdir(newdir, mode=0777): """works the way a good mkdir should :) - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well """ if os.path.isdir(newdir): pass elif os.path.isfile(newdir): raise OSError("a file with the same name as the desired " \ "dir, '%s', already exists." % newdir) else: head, tail = os.path.split(newdir) if head and not os.path.isdir(head): mymkdir(head, mode) if tail: os.umask(022) os.mkdir(newdir, mode) def escape_string(s): "Escapes special chars in string. For MySQL queries." s = MySQLdb.escape_string(s) return s def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if var == fld: return " selected" else: return "" def write_message(msg, stream=sys.stdout): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) return def create_andornot_box(name='op', value='', ln='en'): "Returns HTML code for the AND/OR/NOT selection box." out = """ """ % (name, is_selected('a', value), msg_and[ln], is_selected('o', value), msg_or[ln], is_selected('n', value), msg_and_not[ln]) return out def create_matchtype_box(name='m', value='', ln='en'): "Returns HTML code for the 'match type' selection box." out = """ """ % (name, is_selected('a', value), msg_all_of_the_words[ln], is_selected('o', value), msg_any_of_the_words[ln], is_selected('e', value), msg_exact_phrase[ln], is_selected('p', value), msg_partial_phrase[ln], is_selected('r', value), msg_regular_expression[ln]) return out def get_field(recID, tag): "Gets list of field 'tag' for the record with 'recID' system number." out = [] digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \ % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def print_record(recID, format='hb', ln=cdslang): "Prints record 'recID' formatted accoding to 'format'." out = "" # HTML brief format by default query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % zlib.decompress(res[0][0]) else: # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_field(recID, "245__a") for title in titles: out += "%s " % cgi.escape(title) # secondly, authors: authors = get_field(recID, "100__a") + get_field(recID, "700__a") if authors: out += " / " for i in range (0,cfg_author_et_al_threshold): if i < len(authors): out += """%s ;""" \ % (weburl, urllib.quote(authors[i]), cgi.escape(authors[i])) if len(authors) > cfg_author_et_al_threshold: out += " et al." # thirdly, date of creation: dates = get_field(recID, "260__c") for date in dates: out += " %s." % cgi.escape(date) # thirdly bis, report numbers: rns = get_field(recID, "037__a") for rn in rns: out += """ [%s]""" % cgi.escape(rn) rns = get_field(recID, "088__a") for rn in rns: out += """ [%s]""" % cgi.escape(rn) # fourthly, beginning of abstract: abstracts = get_field(recID, "520__a") for abstract in abstracts: out += "
%s [...]" % cgi.escape(abstract[:1+string.find(abstract, '.')]) # fifthly, fulltext link: urls_z = get_field(recID, "8564_z") urls_u = get_field(recID, "8564_u") for idx in range(0,len(urls_u)): out += """
%s""" % (urls_u[idx], urls_u[idx]) # at the end of HTML mode, print "Detailed record" and "Mark record" functions: out += """
%s""" \ % (weburl, recID, ln, msg_detailed_record[ln]) out += """ - %s\n""" % \ (weburl, recID, ln, msg_similar_records[ln]) #out += """<- Mark record""" % recID out += "" return out class Collection: "Holds the information on collections (id,name,dbquery)." def __init__(self, name=""): "Creates collection instance by querying the MySQL configuration database about 'name'." self.calculate_reclist_run_already = 0 # to speed things up wihtout much refactoring self.update_reclist_run_already = 0 # to speed things up wihtout much refactoring self.reclist_with_nonpublic_subcolls = HitSet() if not name: self.name = cdsname # by default we are working on the home page self.id = 1 self.dbquery = None self.nbrecs = None self.reclist = HitSet() else: self.name = name query = "SELECT id,name,dbquery,nbrecs,reclist FROM collection WHERE name='%s'" % escape_string(name) try: res = run_sql(query, None, 1) if res: self.id = res[0][0] self.name = res[0][1] self.dbquery = res[0][2] self.nbrecs = res[0][3] try: self.reclist = HitSet(Numeric.loads(zlib.decompress(res[0][5]))) except: self.reclist = HitSet() else: # collection does not exist! self.id = None self.dbquery = None self.nbrecs = None self.reclist = HitSet() except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1) def get_name(self, ln=cdslang, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""): """Return nicely formatted collection name for language LN. The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc.""" out = prolog i18name = "" res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type)) try: i18name += res[0][0] except IndexError: pass if i18name: out += i18name else: out += self.name out += epilog return out def get_ancestors(self): "Returns list of ancestors of the current collection." ancestors = [] id_son = self.id while 1: query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son) res = run_sql(query, None, 1) if res: col_ancestor = get_collection(res[0][1]) ancestors.append(col_ancestor) id_son = res[0][0] else: break ancestors.reverse() return ancestors def restricted_p(self): """Predicate to test if the collection is restricted or not. Return the contect of the `restrited' column of the collection table (typically Apache group). Otherwise return None if the collection is public.""" out = None query = "SELECT restricted FROM collection WHERE id=%d" % self.id res = run_sql(query, None, 1) try: out = res[0][0] except: pass return out def get_sons(self, type='r'): "Returns list of direct sons of type 'type' for the current collection." sons = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type) res = run_sql(query) for row in res: sons.append(get_collection(row[1])) return sons def get_descendants(self, type='r'): "Returns list of all descendants of type 'type' for the current collection." descendants = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type) res = run_sql(query) for row in res: col_desc = get_collection(row[1]) descendants.append(col_desc) descendants += col_desc.get_descendants() return descendants def write_cache_file(self, filename='', filebody=''): "Write a file inside collection cache." # open file: dirname = "%s/collections/%d" % (cachedir, self.id) mymkdir(dirname) fullfilename = dirname + "/%s.html" % filename try: os.umask(022) f = open(fullfilename, "w") except IOError, v: try: (code, message) = v except: code = 0 message = v print "I/O Error: " + str(message) + " (" + str(code) + ")" sys.exit(1) # print user info: if options["verbose"] >= 3: write_message("... creating %s" % fullfilename) sys.stdout.flush() # print page body: f.write(filebody) # close file: f.close() def update_webpage_cache(self): """Create collection page header, navtrail, body (including left and right stripes) and footer, and call write_cache_file() afterwards to update the collection webpage cache.""" ## do this for each language: for lang in <: print generate_language_list_for_python(); :>: ## first, update navtrail: for as in range(0,2): self.write_cache_file("navtrail-as=%s-ln=%s" % (as, lang), self.create_navtrail_links(as, lang)) ## second, update page body: for as in range(0,2): # do both simple search and advanced search pages: body = "" body += """
""" % weburl - body += self.create_portalbox(lang, 'te') body += "" + self.create_searchfor(as, lang) body += self.create_portalbox(lang, 'np') body += """""" body += """""" body += """" body_focuson = self.create_narrowsearch(as, lang, msg_focus_on[lang], "v") if body_focuson: body += """""" body += "
""" + self.create_narrowsearch(as, lang, msg_narrow_search[lang]) + """" + body_focuson + """
" body += self.create_portalbox(lang, 'ne') body += "
" self.write_cache_file("body-as=%s-ln=%s" % (as, lang), body) ## third, write portalboxes: + self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp")) + self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te")) self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt")) - self.write_cache_file("portalbox-lb-ln=%s" % lang, self.create_portalbox(lang, "lb")) self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt")) - self.write_cache_file("portalbox-rb-ln=%s" % lang, self.create_portalbox(lang, "rb")) ## fourth, write 'last updated' information: self.write_cache_file("last-updated-ln=%s" % lang, time.strftime("%02d %b %04Y %02H:%02M:%02S %Z", time.localtime())) return def create_navtrail_links(self, \ as=0, ln=cdslang, separator=" > "): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If as==1, then links to Advanced Search interfaces; otherwise Simple Search. """ out = "" for dad in self.get_ancestors(): if dad.name != cdsname: # exclude Home collection if out: out += separator out += """%s""" % \ (weburl, urllib.quote_plus(dad.name), as, ln, dad.get_name(ln)) return out def create_nbrecs_info(self, ln=cdslang, prolog=""" (""", epilog=""")"""): "Return information on the number of records." out = "" if self.nbrecs: out = prolog + nice_number(self.nbrecs, ln) + epilog return out def create_portalbox(self, lang=cdslang, position="rt"): """Creates portalboxes of language CDSLANG of the position POSITION by consulting MySQL configuration database. - The position may be: 'lt'='left top', 'lb'='left bottom', 'rt'='right top', 'rb'='right bottom', - 'tl'='top left', 'tr'='top right', 'bl'='bottom left', 'br='bottom right'.""" + The position may be: 'lt'='left top', 'rt'='right top', etc.""" out = "" query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\ " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\ " ORDER BY cp.score DESC" % (self.id, lang, position) res = run_sql(query) for row in res: title, body = row[0], row[1] if title: out += """
%s
%s
""" % (title, body) else: # no title specified, so print body ``as is'' only: out += body return out def create_narrowsearch(self, as=0, ln=cdslang, title="Narrow search", type="r"): """Creates list of collection descendants of type 'type' under title 'title'. If as==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes.""" narrowsearch="" # return nothing for type 'v' (virtual collection) if there are no sons: if type == 'v' and not self.get_sons(type): return "" # firstly write silent 'cc' (=current collection) argument: if type == 'r': # but not for virtual collections narrowsearch += """""" % self.name # then get list of sons and analyse it: sons = self.get_sons(type) # decide upon writing style: if there are grandchildren, then print in bold descendants = self.get_descendants(type) if len(descendants)>len(sons): style_prolog = "" style_epilog = "" else: style_prolog = "" style_epilog = "" # are there some sons? if len(sons): narrowsearch += """""" % title # iterate through sons: for son in sons: narrowsearch += """""" % son.name else: narrowsearch += """ """ % son.name narrowsearch += """""" narrowsearch += "
%s
""" if type=='r': if son.restricted_p() and son.restricted_p() != self.restricted_p(): narrowsearch += """ %s%s%s%s """ % \ (weburl, urllib.quote_plus(son.name), as, ln, style_prolog, son.get_name(ln), style_epilog, son.create_nbrecs_info(ln)) if son.restricted_p(): narrowsearch += """ [%s]""" % msg_restricted[ln] if cfg_narrow_search_show_grandsons: # iterate trough grandsons: grandsons = son.get_sons() nb_grandsons = len(grandsons) if nb_grandsons: narrowsearch += """
""" for i in range(0,nb_grandsons): narrowsearch += """%s%s """ % \ (weburl, urllib.quote_plus(grandsons[i].name), as, ln, grandsons[i].get_name(ln), \ grandsons[i].create_nbrecs_info(ln)) narrowsearch += """
" else: if type == 'r': # no sons, and type 'r', so print info on collection content: narrowsearch += """
%s
%s
""" % (msg_latest_additions[ln], self.create_instant_browse(ln=ln)) return narrowsearch def create_instant_browse(self, rg=cfg_instant_browse, ln=cdslang): "Searches database and produces list of last 'rg' records." box = "" if self.restricted_p(): box += msg_collection_restricted_content[ln] else: url = "%s/search.py?cc=%s&jrec=%d" % (weburl, urllib.quote_plus(self.name), rg+1) if self.nbrecs and self.reclist: # firstly, get last 'rg' records: box += """""" recIDs = Numeric.nonzero(self.reclist._set) for idx in range(self.nbrecs-1, self.nbrecs-rg-1, -1): if idx>=0: box += """""" % (get_creation_date(recIDs[idx],fmt="%Y-%m-%d
%H:%i"), print_record(recIDs[idx], ln=ln)) box += "
%s %s
" if self.nbrecs > rg: box += """""" % (url, ln, msg_more[ln]) else: box += msg_collection_contains_no_records[ln] return box def create_searchoptions(self): "Produces 'Search options' portal box." box="" query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id ORDER BY cff.score DESC""" % self.id res = run_sql(query) if res: for row in res: field_id = row[0] field_code = row[1] field_name = row[2] query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id) res_bis = run_sql(query_bis) if res_bis: box += """""" return box def create_inputdate(self, name="d1", ln=cdslang): "Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options." box = "" # day box += """""" # month box += """""" # year box += """""" return box def create_publishedin(self, title="From date", name="d1"): """Produces 'Published in' selection box, if collection name contains the text 'Articles'. Suitable for advanced search option.""" out = "" if string.find(self.name, "Article") == -1: return out # journal title: cell_1a = "Published in:
" cell_1a += """""" # volume: cell_1b = "Volume:
" cell_1b += """""" # year: cell_1c = "Year:
" cell_1c += """""" # page: cell_1d = "Page:
" cell_1d += """""" out += """
""" % \ (cell_1a, cell_1b, cell_1c, cell_1d) return out def create_sortoptions(self, ln=cdslang): "Produces 'Sort options' portal box." box="" query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id box += """""" box += """""" % (msg_ascending[ln], msg_descending[ln]) return box def create_rankoptions(self, ln=cdslang): "Produces 'Rank options' portal box." box="" box += """""" return box def create_displayoptions(self, ln=cdslang): "Produces 'Display options' portal box." box="" # firstly, display hits per collection selection box: box += """""" % (msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln], msg_results[ln]) # secondly, display split by collection selection box: if self.get_sons(): box += """""" % (msg_split_by_collection[ln], msg_single_list[ln]) return box def create_formatoptions(self, ln=cdslang): "Produces 'Output format options' portal box." box = "" box += """""" return box def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'): "Produces 'search within' selection box for the current collection." out = "" out += """""" return out def create_searchexample(self): "Produces search example(s) for the current collection." out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id return out def create_searchfor(self, as=0, ln=cdslang): "Produces either Simple or Advanced 'Search for' box for the current collection." if as == 1: return self.create_searchfor_advanced(ln) else: return self.create_searchfor_simple(ln) def create_searchfor_simple(self, ln=cdslang): "Produces simple 'Search for' box for the current collection." # print commentary start: out = "" out += """""" out += """""" % ln # define URL add-ons for simple and advanced search boxen: if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln # define search box elements: header = msg_search_records_for[ln] % self.create_nbrecs_info(ln, "","") cell_1_left = """""" cell_1_middle = "%s" % self.create_searchwithin_selection_box(ln=ln) cell_1_right = """""" % (msg_search[ln], msg_browse[ln]) cell_2 = """%s :: %s""" \ % (weburl, ln, msg_search_tips[ln], asearchurl, msg_advanced_search[ln]) # merge them: out += """""" % \ (header, cell_1_left, cell_1_middle, cell_1_right, cell_2) # print commentary end: out += "" return out def create_searchfor_advanced(self, ln=cdslang): "Produces simple 'Search for' box for the current collection." # print commentary start: out = "" out += """""" out += """""" % ln # define URL add-ons for simple and advanced search boxen: if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln # define search box elements: header = msg_search_records_for[ln] % self.create_nbrecs_info(ln, "", "") cell_1_left = create_matchtype_box('m1', ln=ln) + """""" cell_1_middle = self.create_searchwithin_selection_box('f1', ln=ln) cell_1_right = create_andornot_box('op1', ln=ln) cell_2_left = create_matchtype_box('m2', ln=ln) + """""" cell_2_middle = self.create_searchwithin_selection_box('f2', ln=ln) cell_2_right = create_andornot_box('op2', ln=ln) cell_3_left = create_matchtype_box('m3', ln=ln) + """""" cell_3_middle = self.create_searchwithin_selection_box('f3', ln=ln) cell_3_right = """ """ % (msg_search[ln], msg_browse[ln]) cell_4 = """%s :: %s""" \ % (weburl, ln, msg_search_tips[ln], ssearchurl, msg_simple_search[ln]) # merge them: out += """""" % \ (header, cell_1_left, cell_1_middle, cell_1_right, \ cell_2_left, cell_2_middle, cell_2_right, \ cell_3_left, cell_3_middle, cell_3_right, \ cell_4) # create more search options, if any: if self.create_searchoptions(): out += """""" % (msg_search_options[ln], self.create_searchoptions()) if 0: # FIXME: create published in, if appropriate: if self.create_publishedin(): out += self.create_publishedin() # arrival date: from/until: cell_6_a = self.create_inputdate("d1", ln=ln) cell_6_b = self.create_inputdate("d2", ln=ln) out += """""" % \ (msg_added_since[ln], msg_until[ln], cell_6_a, cell_6_b) # sort/rank and display/format options: cell_7_a = self.create_sortoptions(ln) + self.create_rankoptions(ln) cell_7_b = self.create_displayoptions(ln) cell_7_c = self.create_formatoptions(ln) out += """""" % \ (msg_sort_by[ln], msg_display_results[ln], msg_output_format[ln], cell_7_a, cell_7_b, cell_7_c) # print commentary end: out += "" return out def calculate_reclist(self): """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection.""" if self.calculate_reclist_run_already: # do we have to recalculate? return (self.reclist, self.reclist_with_nonpublic_subcolls) if options["verbose"] >= 3: write_message("... calculating reclist of %s" % self.name) reclist = HitSet() # will hold results for public sons only; good for storing into DB reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total # number of documents if not self.dbquery: # A - collection does not have dbquery, so query recursively all its sons # that are either non-restricted or that have the same restriction rules for coll in self.get_sons(): coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist() if ((coll.restricted_p() is None) or (coll.restricted_p() == self.restricted_p())): # add this reclist ``for real'' only if it is public reclist.union(coll_reclist) reclist_with_nonpublic_subcolls.union(coll_reclist_with_nonpublic_subcolls) else: # B - collection does have dbquery, so compute it: reclist = search_pattern(None,self.dbquery) reclist_with_nonpublic_subcolls = copy.deepcopy(reclist) # deduce the number of records: reclist.calculate_nbhits() reclist_with_nonpublic_subcolls.calculate_nbhits() # store the results: self.nbrecs = reclist_with_nonpublic_subcolls._nbhits self.reclist = reclist self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls # last but not least, update the speed-up flag: self.calculate_reclist_run_already = 1 # return the two sets: return (self.reclist, self.reclist_with_nonpublic_subcolls) def update_reclist(self): "Update the record universe for given collection; nbrecs, reclist of the collection table." if self.update_reclist_run_already: # do we have to reupdate? return 0 if options["verbose"] >= 3: write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs)) sys.stdout.flush() try: query = "UPDATE collection SET nbrecs=%d, reclist='%s' WHERE id=%d" % \ (self.nbrecs, escape_string(zlib.compress(Numeric.dumps(self.reclist._set))), self.id) res = run_sql(query) self.reclist_updated_since_start = 1 except MySQLdb.Error, e: print "Database Query Error %d: %s." % (e.args[0], e.args[1]) sys.exit(1) # last but not least, update the speed-up flag: self.update_reclist_run_already = 1 return 0 def usage(code, msg=''): "Prints usage info." if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [collection][+]\n" % sys.argv[0]) sys.stderr.write("""Description: %s updates the collection cache (record universe for a given collection plus web page elements) based on WML and MySQL configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""" % sys.argv[0]) sys.stderr.write("Example: %s update-reclist\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"+\n" % sys.argv[0]) sys.stderr.write("Example: %s update-webpage \"\"\n" % sys.argv[0]) sys.stderr.write("Example: %s update-reclist \"\"+\n" % sys.argv[0]) sys.exit(code) def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re=sre.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr).""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("stopping...") task_update_status("STOPPING") pass # FIXME: is there anything to be done? task_update_status("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" write_message("unknown signal %d ignored" % sig) # do nothing for other signals def authenticate(user, header="WebColl Task Submission", action="runwebcoll"): """Authenticate the user against the user database. Check for its password, if it exists. Check for action access rights. Return user name upon authorization success, do system exit upon authorization failure. """ print header print "=" * len(header) if user == "": print >> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print >> sys.stdout, "\rUsername: ", user ## first check user pw: res = run_sql("select id,password from user where email=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) ## secondly check authorization for the action: (auth_code, auth_message) = acc_authorize_action(uid_db, action) if auth_code != 0: print auth_message sys.exit(1) return user def task_submit(options): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: if options["verbose"] >= 9: print "" write_message("storing task options %s\n" % options) task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments) VALUES (NULL,'webcoll',%s,%s,%s,'WAITING',%s)""", (user, options["runtime"], options["sleeptime"], marshal.dumps(options))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, task_id)) def task_update_status(val): """Updates status information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, task_id)) def task_read_status(task_id): """Read status information in the BibSched task table.""" res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='webcoll'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: WebColl task %d does not seem to exist." % id) sys.exit(1) return out def task_run(): """Run the WebColl task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task will update collection reclist cache and collection web pages for given collection. (default is all). Arguments described in usage() function. Return 1 in case of success and 0 in case of failure.""" global task_id, options options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a WebColl task." % task_id) return 0 ## check task status: task_status = task_read_status(task_id) if task_status != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_status)) return 0 ## we can run the task now: if options["verbose"]: write_message("Task #%d started." % task_id) task_update_status("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) colls = [] # firstly, decide which collections to do: if options.has_key("collection"): coll = get_collection(options["collection"]) if coll.id == None: usage(1, 'Collection %s does not exist' % coll.name) colls.append(coll) else: res = run_sql("SELECT name FROM collection ORDER BY id") for row in res: colls.append(get_collection(row[0])) # secondly, update collection reclist cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / reclist cache update" % coll.name) coll.calculate_reclist() coll.update_reclist() task_update_progress("Part 1/2: done %d/%d" % (i,len(colls))) # thirdly, update collection webpage cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / web cache update" % coll.name) coll.update_webpage_cache() task_update_progress("Part 2/2: done %d/%d" % (i,len(colls))) ## we are done: task_update_progress("Done.") task_update_status("DONE") if options["verbose"]: write_message("Task #%d finished." % task_id) return 1 def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write("Command options:\n") sys.stderr.write(" -c, --collection\t Update only given collection. [all]\n") sys.stderr.write("Scheduling options:\n") sys.stderr.write(" -u, --user=USER \t User name to submit the task as, password needed.\n") sys.stderr.write(" -t, --runtime=TIME \t Time to execute the task (now), e.g.: +15s, 5m, 3h, 2002-10-27 13:57:26\n") sys.stderr.write(" -s, --sleeptime=SLEEP \t Sleeping frequency after which to repeat task (no), e.g.: 30m, 2h, 1d\n") sys.stderr.write("General options:\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -v, --verbose=LEVEL \t Verbose level (from 0 to 9, default 1).\n") sys.stderr.write("""Description: %s updates the collection cache (record universe for a given collection plus web page elements) based on WML and MySQL configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""" % sys.argv[0]) sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global task_id ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) try: if not task_run(): write_message("Error occurred. Exiting.", sys.stderr) except StandardError, e: write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:", sys.stderr) traceback.print_tb(sys.exc_info()[2]) write_message("Exiting.", sys.stderr) task_update_status("ERROR") else: ## B - submit the task # set default values: options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S") options["verbose"] = 1 options["sleeptime"] = "" # set user-defined options: try: opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:c:", ["help", "version", "verbose=","user=","sleep=","time=","collection="]) except getopt.GetoptError, err: usage(1, err) try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __version__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-v", "--verbose"]: options["verbose"] = int(opt[1]) elif opt[0] in [ "-s", "--sleeptime" ]: get_datetime(opt[1]) # see if it is a valid shift options["sleeptime"] = opt[1] elif opt[0] in [ "-t", "--runtime" ]: options["runtime"] = get_datetime(opt[1]) elif opt[0] in [ "-c", "--collection"]: options["collection"] = opt[1] else: usage(1) except StandardError, e: usage(e) task_submit(options) return ### okay, here we go: if __name__ == '__main__': main()