diff --git a/modules/webjournal/lib/webjournal_utils.py b/modules/webjournal/lib/webjournal_utils.py index 101a0b560..728880c72 100644 --- a/modules/webjournal/lib/webjournal_utils.py +++ b/modules/webjournal/lib/webjournal_utils.py @@ -1,1197 +1,1197 @@ # -*- coding: utf-8 -*- ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Various utilities for WebJournal, e.g. config parser, etc. """ import time import datetime import re import os import cPickle import urllib from MySQLdb import OperationalError from xml.dom import minidom from invenio.config import \ CFG_ETCDIR, \ CFG_SITE_URL, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_ACCESS_CONTROL_LEVEL_SITE from invenio.dbquery import run_sql from invenio.bibformat_engine import BibFormatObject from invenio.search_engine import search_pattern ########################### REGULAR EXPRESSIONS ############################### header_pattern = re.compile('<p\s*(align=justify)??>\s*<strong>(?P<header>.*?)</strong>\s*</p>') para_pattern = re.compile('<p.*?>(?P<paragraph>.+?)</p>', re.DOTALL) image_pattern = re.compile(r''' (<a\s*href=["']?(?P<hyperlink>\S*)["']?>)?# get the link location for the image \s*# after each tag we can have arbitrary whitespaces <center># the image is always centered \s* <img\s*(class=["']imageScale["'])*?\s*src=(?P<image>\S*)\s*border=1\s*(/)?># getting the image itself \s* </center> \s* (</a>)? (<br />|<br />|<br/>)*# the caption can be separated by any nr of line breaks ( <b> \s* <i> \s* <center>(?P<caption>.*?)</center># getting the caption \s* </i> \s* </b> )?''', re.DOTALL | re.VERBOSE | re.IGNORECASE ) #''',re.DOTALL | re.IGNORECASE | re.VERBOSE | re.MULTILINE) ############################ MAPPING FUNCTIONS ################################ def get_featured_records(journal_name): """ Returns the 'featured' records i.e. records chosen to be displayed with an image on the main page, in the widgets section, for the given journal. parameter: journal_name - (str) the name of the journal for which we want to get the featured records returns: list of tuples (recid, img_url) """ try: feature_file = open('%s/webjournal/%s/featured_record' % \ (CFG_ETCDIR, journal_name)) except: return [] records = feature_file.readlines() return [(record.split('---', 1)[0], record.split('---', 1)[1]) \ for record in records if "---" in record] def add_featured_record(journal_name, recid, img_url): """ Adds the given record to the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. img_url - (str) a url to an image icon displayed along the featured record. returns: 0 if everything went ok 1 if record is already in the list 2 if other problems """ # Check that record is not already there featured_records = get_featured_records(journal_name) for featured_recid, featured_img in featured_records: if featured_recid == str(recid): return 1 try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "a") fptr.write(str(recid) + '---' + img_url + '\n') fptr.close() except: return 2 return 0 def remove_featured_record(journal_name, recid): """ Removes the given record from the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. """ featured_records = get_featured_records(journal_name) try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "w") for featured_recid, featured_img in featured_records: if str(featured_recid) != str(recid): fptr.write(str(featured_recid) + '---' + featured_img + \ '\n') fptr.close() except: return 1 return 0 def get_order_dict_from_recid_list(recids, issue_number): """ Returns the ordered list of input recids, for given 'issue_number'. Since there might be several articles at the same position, the returned structure is a dictionary with keys being order number indicated in record metadata, and values being list of recids for this order number (recids for one position are ordered from highest to lowest recid). Eg: {'1': [2390, 2386, 2385], '3': [2388], '2': [2389], '4': [2387]} parameters: recids: a list of all recid's that should be brought into order issue_number: the issue_number for which we are deriving the order (this has to be one number) returns: ordered_records: a dictionary with the recids ordered by keys """ ordered_records = {} records_without_defined_order = [] for record in recids: temp_rec = BibFormatObject(record) articles_info = temp_rec.fields('773__') for article_info in articles_info: if article_info.get('n', '') == issue_number: if article_info.has_key('c'): order_number = article_info.get('c', '') if ordered_records.has_key(order_number): ordered_records[order_number].append(record) else: ordered_records[order_number] = [record] else: # No order? No problem! Append it at the end. records_without_defined_order.append(record) for record in records_without_defined_order: ordered_records[max(ordered_records.keys()) + 1] = record for (order, records) in ordered_records.iteritems(): # Reverse so that if there are several articles at same # positon, newest appear first records.reverse() return ordered_records def get_records_in_same_issue_in_order(recid): """ TODO: Remove? """ raise ("Not implemented yet.") def get_rule_string_from_rule_list(rule_list, category): """ """ i = 0 current_category_in_list = 0 for rule_string in rule_list: category_from_config = rule_string.split(",")[0] if category_from_config.lower() == category.lower(): current_category_in_list = i i += 1 try: rule_string = rule_list[current_category_in_list] except: rule_string = "" # todo: exception return rule_string def get_categories_from_rule_list(rule_list): """ Returns the list of categories defined for this configuration """ categories = [rule_string.split(',')[0] \ for rule_string in rule_list] return categories def get_category_from_rule_string(rule_string): """ TODO: Remove? """ pass def get_rule_string_from_category(category): """ TODO: Remove? """ pass ######################## TIME / ISSUE FUNCTIONS ############################### def get_monday_of_the_week(week_number, year): """ CERN Bulletin specific function that returns a string indicating the Monday of each week as: Monday <dd> <Month> <Year> """ timetuple = issue_week_strings_to_times(['%s/%s' % (week_number, year), ])[0] return time.strftime("%A %d %B %Y", timetuple) def get_issue_number_display(issue_number, journal_name, ln=CFG_SITE_LANG): """ Returns the display string for a given issue number. """ journal_id = get_journal_id(journal_name, ln) issue_display = run_sql("SELECT issue_display FROM jrnISSUE \ WHERE issue_number=%s AND id_jrnJOURNAL=%s", (issue_number, journal_id))[0][0] return issue_display def get_current_issue_time(journal_name, ln=CFG_SITE_LANG): """ Return the current issue of a journal as a time object. """ current_issue = get_current_issue(ln, journal_name) week_number = current_issue.split("/")[0] year = current_issue.split("/")[1] current_issue_time = issue_week_strings_to_times(['%s/%s' % (week_number, year), ])[0] return current_issue_time def get_all_issue_weeks(issue_time, journal_name, ln): """ Function that takes an issue_number, checks the DB for the issue_display which can contain the other (update) weeks involved with this issue and returns all issues in a list of timetuples (always for Monday of each week). """ from invenio.webjournal_config import InvenioWebJournalIssueNotFoundDBError journal_id = get_journal_id(journal_name) issue_string = issue_times_to_week_strings([issue_time])[0] try: issue_display = run_sql( "SELECT issue_display FROM jrnISSUE WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (issue_string, journal_id))[0][0] except: raise InvenioWebJournalIssueNotFoundDBError(ln, journal_name, issue_string) issue_bounds = issue_display.split("/")[0].split("-") year = issue_display.split("/")[1] all_issue_weeks = [] if len(issue_bounds) == 2: # is the year changing? -> "52-02/2008" if int(issue_bounds[0]) > int(issue_bounds[1]): # get everything from the old year old_year_issues = [] low_bound_time = issue_week_strings_to_times(['%s/%s' % (issue_bounds[0], str(int(year)-1)), ])[0] # if the year changes over the week we always take the higher year low_bound_date = datetime.date(int(time.strftime("%Y", low_bound_time)), int(time.strftime("%m", low_bound_time)), int(time.strftime("%d", low_bound_time))) week_counter = datetime.timedelta(weeks=1) date = low_bound_date # count up the weeks until you get to the new year while date.year != int(year): old_year_issues.append(date.timetuple()) #format = time.strftime("%W/%Y", date.timetuple()) date = date + week_counter # get everything from the new year new_year_issues = [] for i in range(1, int(issue_bounds[1])+1): to_append = issue_week_strings_to_times(['%s/%s' % (i, year)])[0] new_year_issues.append(to_append) all_issue_weeks += old_year_issues all_issue_weeks += new_year_issues else: for i in range(int(issue_bounds[0]), int(issue_bounds[1])+1): to_append = issue_week_strings_to_times(['%s/%s' % (i, year)])[0] all_issue_weeks.append(to_append) elif len(issue_bounds) == 1: to_append = issue_week_strings_to_times(['%s/%s' % (issue_bounds[0], year)])[0] all_issue_weeks.append(to_append) else: return False return all_issue_weeks def count_down_to_monday(current_time): """ Takes a timetuple and counts it down to the next monday and returns this time. """ next_monday = datetime.date(int(time.strftime("%Y", current_time)), int(time.strftime("%m", current_time)), int(time.strftime("%d", current_time))) counter = datetime.timedelta(days=-1) while next_monday.weekday() != 0: next_monday = next_monday + counter return next_monday.timetuple() def get_next_journal_issues(current_issue_time, journal_name, ln=CFG_SITE_LANG, number=2): """ Returns the <number> next issue numbers from the current_issue_time. """ #now = '%s-%s-%s 00:00:00' % (int(time.strftime("%Y", current_issue_time)), # int(time.strftime("%m", current_issue_time)), # int(time.strftime("%d", current_issue_time))) # now = datetime.date(int(time.strftime("%Y", current_issue_time)), int(time.strftime("%m", current_issue_time)), int(time.strftime("%d", current_issue_time))) week_counter = datetime.timedelta(weeks=1) date = now next_issues = [] for i in range(1, number+1): date = date + week_counter #date = run_sql("SELECT %s + INTERVAL 1 WEEK", (date,))[0][0] #date_formated = time.strptime(date, "%Y-%m-%d %H:%M:%S") #raise '%s %s' % (repr(now), repr(date_formated)) next_issues.append(date.timetuple()) #next_issues.append(date_formated) return next_issues def issue_times_to_week_strings(issue_times, ln=CFG_SITE_LANG): """ Function that approaches a correct python time to MySQL time week string conversion by looking up and down the time horizon and always rechecking the python time with the mysql result until a week string match is found. """ issue_strings = [] for issue in issue_times: # do the initial pythonic week view week = time.strftime("%W/%Y", issue) week += " Monday" limit = 5 counter = 0 success = False # try going up 5 while success == False and counter <= limit: counter += 1 success = get_consistent_issue_week(issue, week) if success == False: week = count_week_string_up(week) else: break # try going down 5 counter = 0 while success == False and counter <= limit: counter += 1 success = get_consistent_issue_week(issue, week) if success == False: week = count_week_string_down(week) else: break from invenio.webjournal_config import InvenioWebJournalReleaseDBError if success == False: raise InvenioWebJournalReleaseDBError(ln) #check_for_time = run_sql("SELECT STR_TO_DATE(%s, %s)", # (week, conversion_rule))[0][0] #while (issue != check_for_time.timetuple()): # week = str(int(week.split("/")[0]) + 1) + "/" + week.split("/")[1] # if week[1] == "/": # week = "0" + week # #raise repr(week) # check_for_time = run_sql("SELECT STR_TO_DATE(%s, %s)", # (week, conversion_rule))[0][0] issue_strings.append(week.split(" ")[0]) return issue_strings def count_week_string_up(week): """ Function that takes a week string representation and counts it up by one. """ week_nr = week.split("/")[0] year = week.split("/")[1] if week_nr == "53": week_nr = "01" year = str(int(year) + 1) else: week_nr = str(int(week_nr) + 1) if len(week_nr) == 1: week_nr = "0" + week_nr return "%s/%s" % (week_nr, year) def count_week_string_down(week): """ Function that takes a week string representation and counts it down by one. """ week_nr = week.split("/")[0] year = week.split("/")[1] if week_nr == "01": week_nr = "53" year = str(int(year)-1) else: week_nr = str(int(week_nr)-1) if len(week_nr) == 1: week_nr = "0" + week_nr return "%s/%s" % (week_nr, year) def get_consistent_issue_week(issue_time, issue_week): """ This is the central consistency function between our Python and MySQL dates. We use mysql times because of a bug in Scientific Linux that does not allow us to reconvert a week number to a timetuple. The function takes a week string, e.g. "02/2008" and its according timetuple from our functions. Then it retrieves the mysql timetuple for this week and compares the two times. If they are equal our times are consistent, if not, we return False and some function should try to approach a consisten result (see example in issue_times_to_week_strings()). """ conversion_rule = '%v/%x %W' mysql_repr = run_sql("SELECT STR_TO_DATE(%s, %s)", (issue_week, conversion_rule))[0][0] if mysql_repr.timetuple() == issue_time: return issue_week else: return False def issue_week_strings_to_times(issue_weeks, ln=CFG_SITE_LANG): """ Converts a list of issue week strings (WW/YYYY) to python time objects. """ issue_times = [] for issue in issue_weeks: week_number = issue.split("/")[0] year = issue.split("/")[1] to_convert = '%s/%s Monday' % (year, week_number) conversion_rule = '%x/%v %W' result = run_sql("SELECT STR_TO_DATE(%s, %s)", (to_convert, conversion_rule))[0][0] issue_times.append(result.timetuple()) return issue_times def sort_by_week_number(x, y): """ Sorts a list of week numbers. """ year_x = x.split("/")[1] year_y = y.split("/")[1] if cmp(year_x, year_y) != 0: return cmp(year_x, year_y) else: week_x = x.split("/")[0] week_y = y.split("/")[0] return cmp(week_x, week_y) def get_number_of_articles_for_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Function that returns a dictionary with all categories and number of articles in each category. """ config_strings = get_xml_from_config(["rule"], journal_name) rule_list = config_strings["rule"] all_articles = {} for rule in rule_list: category_name = rule.split(",")[0] if issue[0] == "0" and len(issue) == 7: week_nr = issue.split("/")[0] year = issue.split("/")[1] issue_nr_alternative = "%s/%s" % (week_nr[1], year) all_records_of_a_type = list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category_name, issue))) all_records_of_a_type += list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category_name, issue_nr_alternative))) else: all_records_of_a_type = list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category_name, issue))) all_articles[category_name] = len(all_records_of_a_type) return all_articles def get_list_of_issues_for_publication(publication): """ Takes a publication string, e.g. 23-24/2008 and splits it down to a list of single issues. """ year = publication.split("/")[1] issues_string = publication.split("/")[0] bounds = issues_string.split("-") issues = [] if len(bounds) == 2: low_bound = issues_string.split("-")[0] high_bound = issues_string.split("-")[1] if int(low_bound) < int(high_bound): for i in range(int(low_bound), int(high_bound)+1): issue_nr = str(i) if len(issue_nr) == 1: issue_nr = "0" + issue_nr issues.append("%s/%s" % (issue_nr, year)) else: for i in range(int(low_bound), 53+1): issue_nr = str(i) if len(issue_nr) == 1: issue_nr = "0" + issue_nr issues.append("%s/%s" % (issue_nr, str(int(year)-1))) for i in range(1, int(high_bound) + 1): issue_nr = str(i) if len(issue_nr) == 1: issue_nr = "0" + issue_nr issues.append("%s/%s" % (issue_nr, year)) else: issues.append("%s/%s" % (bounds[0], year)) return issues def get_release_time(issue, journal_name, ln=CFG_SITE_LANG): """ Gets the date at which an issue was released from the DB. Returns False if issue has not yet been released. """ journal_id = get_journal_id(journal_name, ln) try: release_date = run_sql("SELECT date_released FROM jrnISSUE \ WHERE issue_number=%s AND id_jrnJOURNAL=%s", (issue, journal_id))[0][0] except: return False if release_date == None: return False else: return release_date.timetuple() def get_announcement_time(issue, journal_name, ln=CFG_SITE_LANG): """ Get the date at which an issue was announced through the alert system. """ journal_id = get_journal_id(journal_name, ln) try: announce_date = run_sql("SELECT date_announced FROM jrnISSUE \ WHERE issue_number=%s AND id_jrnJOURNAL=%s", (issue, journal_id))[0][0] except: return False if announce_date == None: return False else: return announce_date.timetuple() ######################## GET DEFAULTS FUNCTIONS ############################### def get_journal_info_path(journal_name): """ Returns the path to the info file of the given journal. The info file should be used to get information about a journal when database is not available. Returns None if path cannot be determined """ # We must make sure we don't try to read outside of webjournal # cache dir - info_path = os.path.realpath("%s/webjournal/%s/info.dat"% \ + info_path = os.path.realpath("%s/webjournal/%s/info.dat" % \ (CFG_CACHEDIR, journal_name)) if info_path.startswith(CFG_CACHEDIR + '/webjournal/'): return info_path else: return None def get_journal_id(journal_name, ln=CFG_SITE_LANG): """ Get the id for this journal from the DB. If DB is down, try to get from cache. """ journal_id = None from invenio.webjournal_config import InvenioWebJournalJournalIdNotFoundDBError if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None else: try: res = run_sql("SELECT id FROM jrnJOURNAL WHERE name=%s", (journal_name,)) if len(res) > 0: journal_id = res[0][0] except OperationalError, e: # Cannot connect to database. Try to read from cache journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info['journal_id'] except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None if journal_id is None: raise InvenioWebJournalJournalIdNotFoundDBError(ln, journal_name) return journal_id def guess_journal_name(ln): """ tries to take a guess what a user was looking for on the server if not providing a name for the journal. if there is only one journal on the server, returns the name of which, otherwise redirects to a list with possible journals. """ from invenio.webjournal_config import InvenioWebJournalNoJournalOnServerError from invenio.webjournal_config import InvenioWebJournalNoNameError journals_id_and_names = get_journals_ids_and_names() if len(journals_id_and_names) == 0: raise InvenioWebJournalNoJournalOnServerError(ln) elif len(journals_id_and_names) > 0 and \ journals_id_and_names[0].has_key('journal_name'): return journals_id_and_names[0]['journal_name'] else: raise InvenioWebJournalNoNameError(ln) ## all_journals = run_sql("SELECT * FROM jrnJOURNAL ORDER BY id") ## if len(all_journals) == 0: ## # try to get from file, in case DB is down ## raise InvenioWebJournalNoJournalOnServerError(ln) ## elif len(all_journals) > 0: ## return all_journals[0][1] ## else: ## raise InvenioWebJournalNoNameError(ln) def get_journals_ids_and_names(): """ Returns the list of existing journals IDs and names. Try to read from the DB, or from cache if DB is not accessible. """ journals = [] if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue else: try: res = run_sql("SELECT id, name FROM jrnJOURNAL ORDER BY id") for journal_id, journal_name in res: journals.append({'journal_id': journal_id, 'journal_name': journal_name}) except OperationalError, e: # Cannot connect to database. Try to read from cache files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue return journals def get_current_issue(ln, journal_name): """ Returns the current issue of a journal as a string. """ journal_id = get_journal_id(journal_name, ln) try: current_issue = run_sql("SELECT issue_number FROM jrnISSUE \ WHERE date_released <= NOW() AND id_jrnJOURNAL=%s \ ORDER BY date_released DESC LIMIT 1", (journal_id,))[0][0] except: # start the first journal ever with the day of today current_issue = time.strftime("%W/%Y", time.localtime()) run_sql("INSERT INTO jrnISSUE \ (id_jrnJOURNAL, issue_number, issue_display) \ VALUES(%s, %s, %s)", (journal_id, current_issue, current_issue)) return current_issue def get_current_publication(journal_name, current_issue, ln=CFG_SITE_LANG): """ Returns the current publication string (current issue + updates). """ journal_id = get_journal_id(journal_name, ln) current_publication = run_sql("SELECT issue_display FROM jrnISSUE \ WHERE issue_number=%s AND \ id_jrnJOURNAL=%s", (current_issue, journal_id))[0][0] return current_publication def get_xml_from_config(xpath_list, journal_name): """ wrapper for minidom.getElementsByTagName() Takes a list of string expressions and a journal name and searches the config file of this journal for the given xpath queries. Returns a dictionary with a key for each query and a list of string (innerXml) results for each key. Has a special field "config_fetching_error" that returns an error when something has gone wrong. """ # get and open the config file results = {} config_path = '%s/webjournal/%s/config.xml' % (CFG_ETCDIR, journal_name) config_file = minidom.Document try: config_file = minidom.parse("%s" % config_path) except: #todo: raise exception "error: no config file found" results["config_fetching_error"] = "could not find config file" return results for xpath in xpath_list: result_list = config_file.getElementsByTagName(xpath) results[xpath] = [] for result in result_list: try: result_string = result.firstChild.toxml(encoding="utf-8") except: # WARNING, config did not have a value continue results[xpath].append(result_string) return results def parse_url_string(uri): """ Centralized function to parse any url string given in webjournal. Useful to retrieve current category, journal, etc. from within format elements returns: args: all arguments in dict form """ args = {'journal_name' : '', 'issue_year' : '', 'issue_number' : '', 'issue' : '', 'category' : '', 'recid' : '', 'verbose' : 0, 'ln' : CFG_SITE_LANG, 'archive_year' : ''} # Take everything after journal and before first question mark splitted_uri = uri.split('journal', 1) second_part = splitted_uri[1] splitted_uri = second_part.split('?') uri_middle_part = splitted_uri[0] uri_arguments = '' if len(splitted_uri) > 1: uri_arguments = splitted_uri[1] arg_list = uri_arguments.split("&") args['ln'] = 'en' args['verbose'] = 0 for arg_pair in arg_list: arg_and_value = arg_pair.split('=') if len(arg_and_value) == 2: if arg_and_value[0] == 'ln': args['ln'] = arg_and_value[1] elif arg_and_value[0] == 'verbose' and \ arg_and_value[1].isdigit(): args['verbose'] = arg_and_value[1] elif arg_and_value[0] == 'archive_year': args['archive_year'] = arg_and_value[1] elif arg_and_value[0] == 'name': args['journal_name'] = arg_and_value[1] arg_list = uri_middle_part.split("/") if len(arg_list) > 1 and arg_list[1] not in ['search', 'contact']: args['journal_name'] = urllib.unquote(arg_list[1]) elif arg_list[1] not in ['search', 'contact']: args['journal_name'] = guess_journal_name(args['ln']) if len(arg_list) > 2: args['issue_year'] = urllib.unquote(arg_list[2]) else: issue = get_current_issue(args['ln'], args['journal_name']) args['issue'] = issue args['issue_year'] = issue.split('/')[1] args['issue_number'] = issue.split('/')[0] if len(arg_list) > 3: args['issue_number'] = urllib.unquote(arg_list[3]) args['issue'] = args['issue_number'] + "/" + args['issue_year'] if len(arg_list) > 4: args['category'] = urllib.unquote(arg_list[4]) if len(arg_list) > 5: args['recid'] = urllib.unquote(arg_list[5]) # TODO : wash arguments return args def make_journal_url(current_uri, custom_parameters={}): """ Create a url, using the current uri and overriding values with the given custom_parameters """ default_params = parse_url_string(current_uri) for key, value in custom_parameters.iteritems(): # Override default params with custom params default_params[key] = str(value) uri = CFG_SITE_URL + '/journal/' if default_params['journal_name']: uri += urllib.quote(default_params['journal_name']) + '/' if default_params['issue_year']: uri += urllib.quote(default_params['issue_year']) + '/' if default_params['issue_number']: uri += urllib.quote(default_params['issue_number']) + '/' if default_params['category']: uri += urllib.quote(default_params['category']) if default_params['recid']: uri += '/' + urllib.quote(str(default_params['recid'])) printed_question_mark = False if default_params['ln']: uri += '?ln=' + default_params['ln'] printed_question_mark = True if default_params['verbose'] != 0: if printed_question_mark: uri += '&verbose=' + str(default_params['verbose']) else: uri += '?verbose=' + str(default_params['verbose']) return uri ############################" CACHING FUNCTIONS ################################ def cache_index_page(html, journal_name, category, issue, ln): """ Caches the index page main area of a Bulletin (right hand menu cannot be cached) """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.realpath('%s/webjournal/%s/%s_index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching return False cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) if not os.path.isdir(cache_path_dir): os.makedirs(cache_path_dir) cached_file = open(cache_path, "w") cached_file.write(html) cached_file.close() def get_index_page_from_cache(journal_name, category, issue, ln): """ Function to get an index page from the cache. False if not in cache. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.realpath('%s/webjournal/%s/%s_index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def cache_article_page(html, journal_name, category, recid, issue, ln): """ Caches an article view of a journal. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.realpath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching return cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) if not os.path.isdir(cache_path_dir): os.makedirs(cache_path_dir) cached_file = open(cache_path, "w") cached_file.write(html) cached_file.close() def get_article_page_from_cache(journal_name, category, recid, issue, ln): """ Gets an article view of a journal from cache. False if not in cache. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.realpath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def clear_cache_for_article(journal_name, category, recid, issue): """ Resets the cache for an article (e.g. after an article has been modified) """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.realpath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False # try to delete the article cached file try: os.remove('%s/webjournal/%s/%s_article_%s_%s_en.html' % (CFG_CACHEDIR, journal_name, issue, category, recid)) except: pass try: os.remove('%s/webjournal/%s/%s_article_%s_%s_fr.html' % (CFG_CACHEDIR, journal_name, issue, category, recid)) except: pass # delete the index page for the category try: os.remove('%s/webjournal/%s/%s_index_%s_en.html' % (CFG_CACHEDIR, journal_name, issue, category)) except: pass try: os.remove('%s/webjournal/%s/%s_index_%s_fr.html' % (CFG_CACHEDIR, journal_name, issue, category)) except: pass # delete the entry in the recid_order_map # todo: make this per entry try: os.remove('%s/webjournal/%s/%s_recid_order_map.dat' % (CFG_CACHEDIR, journal_name, issue)) except: pass return True def clear_cache_for_issue(journal_name, issue): """ clears the cache of a whole issue. """ issue = issue.replace("/", "_") cache_path_dir = os.path.realpath('%s/webjournal/%s' % \ (CFG_CACHEDIR, journal_name)) if not cache_path_dir.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False all_cached_files = os.listdir(cache_path_dir) for cached_file in all_cached_files: if cached_file[:7] == issue: try: os.remove(cache_path_dir + '/' + cached_file) except: return False return True ## def cache_recid_data_dict_CERNBulletin(recid, issue, rule, order): ## """ ## The CERN Bulletin has a specific recid data dict that is cached ## using cPickle. ## """ ## issue = issue.replace("/", "_") ## # get whats in there ## if not os.path.isdir('%s/webjournal/CERNBulletin' % CFG_CACHEDIR): ## os.makedirs('%s/webjournal/CERNBulletin' % CFG_CACHEDIR) ## try: ## temp_file = open('%s/webjournal/CERNBulletin/%s_recid_order_map.dat' ## % (CFG_CACHEDIR, issue)) ## except: ## temp_file = open('%s/webjournal/CERNBulletin/%s_recid_order_map.dat' ## % (CFG_CACHEDIR, issue), "w") ## try: ## recid_map = cPickle.load(temp_file) ## except: ## recid_map = "" ## temp_file.close() ## # add new recid ## if recid_map == "": ## recid_map = {} ## if not recid_map.has_key(rule): ## recid_map[rule] = {} ## recid_map[rule][order] = recid ## # save back ## temp_file = open('%s/webjournal/CERNBulletin/%s_recid_order_map.dat' ## % (CFG_CACHEDIR, issue), "w") ## cPickle.dump(recid_map, temp_file) ## temp_file.close() ## def get_cached_recid_data_dict_CERNBulletin(issue, rule): ## """ ## Function to restore from cache the dict Data Type that the CERN Bulletin ## uses for mapping between the order of an article and its recid. ## """ ## issue = issue.replace("/", "_") ## try: ## temp_file = open('%s/webjournal/CERNBulletin/%s_recid_order_map.dat' ## % (CFG_CACHEDIR, issue)) ## except: ## return {} ## try: ## recid_map = cPickle.load(temp_file) ## except: ## return {} ## try: ## recid_dict = recid_map[rule] ## except: ## recid_dict = {} ## return recid_dict ######################### CERN SPECIFIC FUNCTIONS ############################# def get_recid_from_legacy_number(issue_number, category, number): """ Returns the recid based on the issue number, category and 'number'. This is used to support URLs using the now deprecated 'number' argument. The function tries to reproduce the behaviour of the old way of doing, even keeping some of its 'problems' (so that we reach the same article as before with a given number).. Returns the recid as int, or -1 if not found """ recids = [] if issue_number[0] == "0": alternative_issue_number = issue_number[1:] recids = list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, issue_number))) recids.extend(list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, alternative_issue_number)))) else: recids = list(search_pattern(p='65017:"%s" and 773__n:%s' % (category, issue_number))) # Now must order the records and pick the one at index 'number'. # But we have to take into account that there can be multiple # records at position 1, and that these additional records should # be numbered with negative numbers: # 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3... negative_index_records = {} positive_index_records = {} # Fill in 'negative_index_records' and 'positive_index_records' # lists with the following loop for recid in recids: bfo = BibFormatObject(recid) order = [subfield['c'] for subfield in bfo.fields('773__') if \ issue_number in subfield['n']] if len(order) > 0: # If several orders are defined for the same article and # the same issue, keep the first one order = order[0] if order.isdigit(): # Order must be an int. Otherwise skip order = int(order) if order == 1 and positive_index_records.has_key(1): # This is then a negative number for this record index = (len(negative_index_records.keys()) > 0 and \ min(negative_index_records.keys()) -1) or 0 negative_index_records[index] = recid else: # Positive number for this record if not positive_index_records.has_key(order): positive_index_records[order] = recid else: # We make the assumption that we cannot have # twice the same position for two # articles. Previous WebJournal module was not # clear about that. Just drop this record # (better than crashing or looping forever..) pass recid_to_return = -1 # Ok, we can finally pick the recid corresponding to 'number' if number <= 0: negative_indexes = negative_index_records.keys() negative_indexes.sort() negative_indexes.reverse() if len(negative_indexes) > abs(number): recid_to_return = negative_index_records[negative_indexes[abs(number)]] else: #positive_indexes = positive_index_records.keys() #positive_indexes.sort() #if len(positive_indexes) >= number: # recid_to_return = positive_index_records[positive_indexes[number -1]] if positive_index_records.has_key(number): recid_to_return = positive_index_records[number] return recid_to_return diff --git a/modules/webjournal/lib/webjournaladminlib.py b/modules/webjournal/lib/webjournaladminlib.py index a72c9762e..b7f595eca 100644 --- a/modules/webjournal/lib/webjournaladminlib.py +++ b/modules/webjournal/lib/webjournaladminlib.py @@ -1,842 +1,855 @@ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable-msg=C0301 """CDS Invenio WebJournal Administration Interface.""" __revision__ = "$Id$" import sets import smtplib import cPickle import re import os import MimeWriter import mimetools import cStringIO from urllib2 import urlopen from invenio.errorlib import register_exception from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ - CFG_ETCDIR + CFG_ETCDIR, \ + CFG_CACHEDIR from invenio.messages import gettext_set_language from invenio.webjournal_config import \ InvenioWebJournalJournalIdNotFoundDBError, \ InvenioWebJournalReleaseUpdateError, \ InvenioWebJournalIssueNotFoundDBError from invenio.webjournal_utils import \ get_journals_ids_and_names, \ guess_journal_name, \ get_current_issue, \ get_current_publication, \ get_list_of_issues_for_publication, \ count_week_string_up, \ get_featured_records, \ add_featured_record, \ remove_featured_record, \ clear_cache_for_issue, \ get_current_issue_time, \ get_all_issue_weeks, \ get_next_journal_issues, \ issue_times_to_week_strings, \ issue_week_strings_to_times, \ get_release_time, \ get_journal_id, \ sort_by_week_number, \ get_xml_from_config, \ get_journal_info_path from invenio.dbquery import run_sql import invenio.template wjt = invenio.template.load('webjournal') def getnavtrail(previous = ''): """Get the navtrail""" navtrail = """<a class="navtrail" href="%s/help/admin">Admin Area</a> """ % (CFG_SITE_URL,) navtrail = navtrail + previous return navtrail def perform_index(ln=CFG_SITE_LANG, journal_name=None, action=None): """ Index page Lists the journals, and offers options to edit them, delete them or add new journal. Parameters: journal_name - the journal affected by action, if any action - one of ['', 'askDelete', _('Delete'), _('Cancel')] ln - language """ _ = gettext_set_language(ln) msg = None if action == 'askDelete' and journal_name is not None: msg = '''<fieldset style="display:inline;margin-left:auto;margin-right:auto;"> <legend>Delete Journal Configuration</legend><span style="color:#f00">Are you sure you want to delete the configuration of %(journal_name)s? <form action="%(CFG_SITE_URL)s/admin/webjournal/webjournaladmin.py"> <input type="hidden" name="journal_name" value="%(journal_name)s" /> <input class="formbutton" type="submit" name="action" value="%(delete)s" /> <input class="formbutton" type="submit" name="action" value="%(cancel)s" /> </form></span></fieldset>''' % {'CFG_SITE_URL': CFG_SITE_URL, 'journal_name': journal_name, 'delete': _("Delete"), 'cancel': _("Cancel")} if action == _("Delete") and journal_name is not None: # User confirmed and clicked on "Delete" button remove_journal(journal_name) journals = get_journals_ids_and_names() return wjt.tmpl_admin_index(ln=ln, journals=journals, msg=msg) def perform_administrate(ln=CFG_SITE_LANG, journal_name=None): """ Administration of a journal Show the current and next issues/publications, and display links to more specific administrative pages. Parameters: journal_name - the journal to be administrated ln - language """ if journal_name is None: try: journal_name = guess_journal_name(ln) except InvenioWebJournalNoJournalOnServerError, e: return e.user_box() if not can_read_xml_config(journal_name): return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name) current_issue = get_current_issue(ln, journal_name) current_publication = get_current_publication(journal_name, current_issue, ln) issue_list = get_list_of_issues_for_publication(current_publication) next_issue_number = count_week_string_up(issue_list[-1]) return wjt.tmpl_admin_administrate(journal_name, current_issue, current_publication, issue_list, next_issue_number, ln) def perform_feature_record(journal_name, recid, img_url='', action='', ln=CFG_SITE_LANG): """ Interface to feature a record Used to list, add and remove featured records of the journal. Parameters: journal_name - the journal for which the article is featured recid - the record affected by 'action' img_url - the URL to image displayed with given record (only when action == 'add') action - One of ['', 'add', 'askremove', _('Remove'), _('Cancel')] ln - language """ _ = gettext_set_language(ln) if action == 'add': result = add_featured_record(journal_name, recid, img_url) if result == 0: msg ='''<span style="color:#0f0">Successfully featured <a href="%(CFG_SITE_URL)s/record/%(recid)s">record %(recid)s</a>. Go to the <a href="%(CFG_SITE_URL)s/journal/%(name)s">%(name)s journal</a> to see the result.</span>''' % {'CFG_SITE_URL': CFG_SITE_URL, 'name': journal_name, 'recid': recid} elif result == 1: msg = '''<span style="color:#f00"><a href="%(CFG_SITE_URL)s/record/%(recid)s">record %(recid)s</a> is already featured. Choose another one or remove it first.</span>''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'recid': recid} else: msg = '''<span style="color:#f00">Record could not be featured. Check file permission.</span>''' featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) elif action == 'askremove': msg = '''<fieldset style="display:inline;margin-left:auto;margin-right:auto;"> <legend>Remove featured record</legend><span style="color:#f00">Are you sure you want to remove <a href="%(CFG_SITE_URL)s/record/%(recid)s">record %(recid)s</a> from the list of featured record? <form action="%(CFG_SITE_URL)s/admin/webjournal/webjournaladmin.py/feature_record"> <input type="hidden" name="journal_name" value="%(name)s" /> <input type="hidden" name="recid" value="%(recid)s" /> <input class="formbutton" type="submit" name="action" value="%(remove)s" /> <input class="formbutton" type="submit" name="action" value="%(cancel)s" /> </form></span></fieldset>''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'name': journal_name, 'recid': recid, 'cancel': _("Cancel"), 'remove': _("Remove")} featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) elif action == _("Remove"): result = remove_featured_record(journal_name, recid) msg = '''<span style="color:#f00"><a href="%(CFG_SITE_URL)s/record/%(recid)s">Record %(recid)s</a> has been removed.</span>''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'recid': recid} featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) else: msg = '''Here you can choose which records from the %s should be featured on the journal webpage.''' % CFG_SITE_NAME featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) def perform_regenerate_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Clears the cache for the given issue. Parameters: journal_name - the journal for which the cache should be deleted issue - the issue for which the cache should be deleted ln - language """ success = clear_cache_for_issue(journal_name, issue) if success: return wjt.tmpl_admin_regenerate_success(ln, journal_name, issue) else: return wjt.tmpl_admin_regenerate_error(ln, journal_name, issue) def perform_request_issue_control(journal_name, issues, action, ln=CFG_SITE_LANG): """ Central logic for issue control. Regenerates the flat files 'current_issue' and 'issue_group' of the journal that control which issue is currently active for the journal. Parameters: journal_name - the journal affected by 'action' issues - list of issues affected by 'action' TODO: check action - One of ['cfg', _('Add'), _('Refresh'), _('Publish'), _('Update')] ln - language """ _ = gettext_set_language(ln) out = '' if action == "cfg" or action == _("Refresh") or action == _("Add"): # find out if we are in update or release try: current_issue_time = get_current_issue_time(journal_name) all_issue_weeks = get_all_issue_weeks(current_issue_time, journal_name, ln) except InvenioWebJournalIssueNotFoundDBError, e: register_exception(req=None) return e.user_box() except InvenioWebJournalJournalIdNotFoundDBError, e: register_exception(req=None) return e.user_box() if max(all_issue_weeks) > current_issue_time: # propose an update next_issue_week = None all_issue_weeks.sort() for issue_week in all_issue_weeks: if issue_week > current_issue_time: next_issue_week = issue_week break out = wjt.tmpl_admin_update_issue(ln, journal_name, issue_times_to_week_strings([next_issue_week,])[0], issue_times_to_week_strings([current_issue_time,])[0]) else: # propose a release next_issues = get_next_journal_issues(current_issue_time, journal_name) next_issues = issue_times_to_week_strings(next_issues, ln) if action == _("Refresh"): next_issues += issues next_issues = list(sets.Set(next_issues))# avoid double entries elif action == _("Add"): next_issues += issues next_issues = list(sets.Set(next_issues))# avoid double entries next_issues_times = issue_week_strings_to_times(next_issues, ln) highest_issue_so_far = max(next_issues_times) one_more_issue = get_next_journal_issues(highest_issue_so_far, journal_name, ln, 1) one_more_issue = issue_times_to_week_strings(one_more_issue, ln) next_issues += one_more_issue next_issues = list(sets.Set(next_issues)) # avoid double entries next_issues.sort() else: # get the next (default 2) issue numbers to publish next_issues = get_next_journal_issues(current_issue_time, journal_name, ln) next_issues = issue_times_to_week_strings(next_issues, ln) out = wjt.tmpl_admin_control_issue(ln, journal_name, next_issues) elif action == _("Publish"): # Publish the given issues (mark them as current issues) publish_issues = issues publish_issues = list(sets.Set(publish_issues)) # avoid double entries publish_issues.sort() try: release_journal_issue(publish_issues, journal_name, ln) except InvenioWebJournalJournalIdNotFoundDBError, e: register_exception(req=None) return e.user_box() out = wjt.tmpl_admin_control_issue_success_msg(ln, publish_issues, journal_name) elif action == _("Update"): try: try: update_issue = issues[0] except: raise InvenioWebJournalReleaseUpdateError(ln, journal_name) except InvenioWebJournalReleaseUpdateError, e: register_exception(req=None) return e.user_box() try: release_journal_update(update_issue, journal_name, ln) except InvenioWebJournalJournalIdNotFoundDBError, e: register_exception(req=None) return e.user_box() out = wjt.tmpl_admin_updated_issue_msg(ln, update_issue, journal_name) return out def perform_request_alert(journal_name, issue, sent, plain_text, subject, recipients, html_mail, force, ln=CFG_SITE_LANG): """ All the logic for alert emails. Display a form to edit email/recipients and options to send the email. Sent in HTML/PlainText or only PlainText if wished so. Also prevent mistake of sending the alert more than one for a particular issue. Parameters: journal_name - the journal for which the alert is sent issue - the issue for which the alert is sent sent - Display interface to edit email if "False" (string). Else send the email. plain_text - the text of the mail subject - the subject of the mail recipients - the recipients of the mail (string with comma-separated emails) html_mail - if 'html', also send email as HTML (copying from the current issue on the web) force - if different than "False", the email is sent even if it has already been sent. ln - language """ if not get_release_time(issue, journal_name, ln): # Trying to send an alert for an unreleased issue return wjt.tmpl_admin_alert_unreleased_issue(ln, journal_name) if sent == "False": # Retrieve default message, subject and recipients, and # display email editor subject = wjt.tmpl_admin_alert_subject(journal_name, ln, issue) plain_text = wjt.tmpl_admin_alert_plain_text(journal_name, ln, issue) plain_text = plain_text.encode('utf-8') recipients = wjt.tmpl_admin_alert_recipients(journal_name, ln, issue) return wjt.tmpl_admin_alert_interface(ln, journal_name, subject, plain_text, recipients) else: # User asked to send the mail if was_alert_sent_for_issue(issue, journal_name, ln) != False and force == "False": # Mmh, email already sent before for this issue. Ask # confirmation return wjt.tmpl_admin_alert_was_already_sent(ln, journal_name, subject, plain_text, recipients, html_mail, issue) if html_mail == "html": # Also send as HTML: retrieve from current issue html_file = urlopen('%s/journal/%s?ln=en' % (CFG_SITE_URL, journal_name)) html_string = html_file.read() html_file.close() html_string = put_css_in_file(html_string, journal_name) else: # Send just as plain text html_string = plain_text.replace("<br/>", '\n') message = createhtmlmail(html_string, plain_text, subject, recipients) ## Transform the recipients string into a list for the mail server: to_addresses = [raw_address.strip() for raw_address in \ recipients.split(",")] recipients = to_addresses ## Send the mail: server = smtplib.SMTP("localhost", 25) server.sendmail('Bulletin-Support@cern.ch', recipients, message) # todo: has to go to some messages config update_DB_for_alert(issue, journal_name, ln) return wjt.tmpl_admin_alert_success_msg(ln, journal_name) def perform_request_configure(journal_name, xml_config, action, ln=CFG_SITE_LANG): """ Add a new journal or configure the settings of an existing journal. Parameters: journal_name - the journal to configure, or name of the new journal xml_config - the xml configuration of the journal (string) action - One of ['edit', 'editDone', 'add', 'addDone'] ln - language """ msg = None if action == 'edit': # Read existing config if journal_name is not None: if not can_read_xml_config(journal_name): return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name) config_path = '%s/webjournal/%s/config.xml' % (CFG_ETCDIR, journal_name) xml_config = file(config_path).read() else: # cannot edit unknown journal... return '<span style="color:#f00">You must specify a journal name</span>' if action in ['editDone', 'addDone']: # Save config if action == 'addDone': res = add_journal(journal_name, xml_config) if res == -1: msg = '<span style="color:#f00">A journal with that name already exists. Please choose another name.</span>' action = 'add' elif res == -2: msg = '<span style="color:#f00">Configuration could not be written (no permission). Please manually copy your config to %s/webjournal/%s/config.xml</span><br/>' % (CFG_ETCDIR, journal_name) action = 'edit' + elif res == -4: + msg = '<span style="color:#f00">Cache file could not be written (no permission). Please manually create directory %s/webjournal/%s/ and make it writable for your Apache user</span><br/>' % (CFG_CACHEDIR, journal_name) + action = 'edit' elif res > 0: msg = '<span style="color:#0f0">Journal successfully added.</span>' action = 'edit' else: msg = '<span style="color:#f00">An error occurred. The journal could not be added</span>' action = 'edit' if action == 'add': # Display a sample config. TODO: makes it less CERN-specific xml_config = '''<?xml version="1.0" encoding="UTF-8"?> <webjournal name="CERNBulletin"> <view> <niceName>CERN Bulletin</niceName> <niceURL>http://bulletin.cern.ch</niceURL> <css> <screen>img/webjournal_CERNBulletin/webjournal_CERNBulletin_screen.css</screen> <print>img/webjournal_CERNBulletin/webjournal_CERNBulletin_print.css</print> </css> <images> <path>img/Objects/Common</path> </images> <format_template> <index>CERN_Bulletin_Index.bft</index> <detailed>CERN_Bulletin_Detailed.bft</detailed> <search>CERN_Bulletin_Search.bft</search> <popup>CERN_Bulletin_Popup.bft</popup> <contact>CERN_Bulletin_Contact.bft</contact> </format_template> </view> <model> <record> <rule>News Articles, 980__a:BULLETINNEWS</rule> <rule>Official News, 980__a:BULLETINOFFICIAL</rule> <rule>Training and Development, 980__a:BULLETINTRAINING</rule> <rule>General Information, 980__a:BULLETINGENERAL</rule> </record> </model> <controller> <widgets>webjournal_weather</widgets> <frequency>14</frequency> <issue_grouping>True</issue_grouping> <marc_tags> <rule_tag>980__a</rule_tag> <issue_number>773__n</issue_number> </marc_tags> </controller> </webjournal>''' out = wjt.tmpl_admin_configure_journal(ln=ln, journal_name=journal_name, xml_config=xml_config, action=action, msg=msg) return out ######################## ADDING/REMOVING JOURNALS ############################### def add_journal(journal_name, xml_config): """ Add a new journal to the DB. Also create the configuration file Parameters: journal_name - the name (used in URLs) of the new journal xml_config - the xml configuration of the journal (string) Returns: the id of the journal if successfully added -1 if could not be added because journal name already exists -2 if config could not be saved -3 if could not be added for other reasons + -4 if database cache could not be added """ try: get_journal_id(journal_name) except InvenioWebJournalJournalIdNotFoundDBError: # Perfect, journal does not exist res = run_sql("INSERT INTO jrnJOURNAL (name) VALUES(%s)", (journal_name,)) # Also save xml_config config_dir = '%s/webjournal/%s/' % (CFG_ETCDIR, journal_name) try: if not os.path.exists(config_dir): os.makedirs(config_dir) xml_config_file = file(config_dir + 'config.xml', 'w') xml_config_file.write(xml_config) xml_config_file.close() except Exception: res = -2 # And save some info in file in case database is down journal_info_path = get_journal_info_path(journal_name) + journal_info_dir = os.path.dirname(journal_info_path) + if not os.path.exists(journal_info_dir): + try: + os.makedirs(journal_info_dir) + except Exception: + if res <= 0: + res = -4 journal_info_file = open(journal_info_path, 'w') + cPickle.dump({'journal_id': res, 'journal_name': journal_name, 'current_issue':'01/2000'}, journal_info_file) return res return -1 def remove_journal(journal_name): """ Remove a journal from the DB. Keep everything else, since the journal should still be accessible. TODO: Think about removing config.xml file too if needed. Parameters: journal_name - the journal to remove Returns: the id of the journal if successfully removed or -1 if could not be removed because journal name does not exist or -2 if could not be removed for other reasons """ run_sql("DELETE FROM jrnJOURNAL WHERE name=%s", (journal_name,)) ######################## TIME / ISSUE FUNCTIONS ############################### def release_journal_issue(publish_issues, journal_name, ln=CFG_SITE_LANG): """ Releases a new issue. This sets the current issue in the database to 'publish_issues' for given 'journal_name' Parameters: journal_name - the journal for which we release a new issue publish_issues - the list of issues that will be considered as current (there can be several) ln - language """ journal_id = get_journal_id(journal_name, ln) if len(publish_issues) > 1: publish_issues.sort(sort_by_week_number) low_bound = publish_issues[0] high_bound = publish_issues[-1] issue_display = '%s-%s/%s' % (low_bound.split("/")[0], high_bound.split("/")[0], high_bound.split("/")[1]) # remember convention: if we are going over a new year, take the higher else: issue_display = publish_issues[0] # produce the DB lines for publish_issue in publish_issues: run_sql("INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display) \ VALUES(%s, %s, %s)", (journal_id, publish_issue, issue_display)) # set first issue to published release_journal_update(publish_issues[0], journal_name, ln) # update information in file (in case DB is down) journal_info_path = get_journal_info_path(journal_name) journal_info_file = open(journal_info_path, 'w') cPickle.dump({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': get_current_issue(ln, journal_name)}, journal_info_file) def delete_journal_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Deletes an issue from the DB. (Not currently used) """ journal_id = get_journal_id(journal_name, ln) run_sql("DELETE FROM jrnISSUE WHERE issue_number=%s \ AND id_jrnJOURNAL=%s",(issue, journal_id)) # update information in file (in case DB is down) journal_info_path = get_journal_info_path(journal_name) journal_info_file = open(journal_info_path, 'w') cPickle.dump({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': get_current_issue(ln, journal_name)}, journal_info_file) def was_alert_sent_for_issue(issue, journal_name, ln): """ Returns False if alert has not already been sent for given journal and issue, else returns time of last alert, as time tuple Parameters: journal_name - the journal for which we want to check last alert issue - the issue for which we want to check last alert ln - language Returns: time tuple or False. Eg: (2008, 4, 25, 7, 58, 37, 4, 116, -1) """ journal_id = get_journal_id(journal_name, ln) date_announced = run_sql("SELECT date_announced FROM jrnISSUE \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (issue, journal_id))[0][0] if date_announced == None: return False else: return date_announced.timetuple() def update_DB_for_alert(issue, journal_name, ln): """ Update the 'last sent alert' timestamp for the given journal and issue. Parameters: journal_name - the journal for which we want to update the time of last alert issue - the issue for which we want to update the time of last alert ln - language """ journal_id = get_journal_id(journal_name, ln) run_sql("UPDATE jrnISSUE set date_announced=NOW() \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (issue, journal_id)) def release_journal_update(update_issue, journal_name, ln=CFG_SITE_LANG): """ Releases an update to a journal. """ journal_id = get_journal_id(journal_name, ln) run_sql("UPDATE jrnISSUE set date_released=NOW() \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (update_issue, journal_id)) ######################## XML CONFIG ############################### def can_read_xml_config(journal_name): """ Check that configuration xml for given journal name is exists and can be read. """ config_path = '%s/webjournal/%s/config.xml' % (CFG_ETCDIR, journal_name) try: file(config_path).read() except IOError: return False return True ######################## EMAIL HELPER FUNCTIONS ############################### def createhtmlmail (html, text, subject, toaddr): """ Create a mime-message that will render HTML in popular MUAs, text in better ones. """ out = cStringIO.StringIO() # output buffer for our message htmlin = cStringIO.StringIO(html) txtin = cStringIO.StringIO(text) writer = MimeWriter.MimeWriter(out) # # set up some basic headers... we put subject here # because smtplib.sendmail expects it to be in the # message body # writer.addheader("Subject", subject) writer.addheader("MIME-Version", "1.0") ## Instead of a comma-separated "To" field, add a new "To" header for ## each of the addresses: to_addresses = [raw_address.strip() for raw_address in toaddr.split(",")] for to_address in to_addresses: writer.addheader("To", to_address) # # start the multipart section of the message # multipart/alternative seems to work better # on some MUAs than multipart/mixed # writer.startmultipartbody("alternative") writer.flushheaders() # # the plain text section # subpart = writer.nextpart() subpart.addheader("Content-Transfer-Encoding", "quoted-printable") #pout = subpart.startbody("text/plain", [("charset", 'us-ascii')]) pout = subpart.startbody("text/plain", [("charset", 'utf-8')]) mimetools.encode(txtin, pout, 'quoted-printable') txtin.close() # # start the html subpart of the message # subpart = writer.nextpart() subpart.addheader("Content-Transfer-Encoding", "quoted-printable") pout = subpart.startbody("text/html", [("charset", 'utf-8')]) mimetools.encode(htmlin, pout, 'quoted-printable') htmlin.close() # # Now that we're done, close our writer and # return the message body # writer.lastpart() msg = out.getvalue() out.close() print msg return msg def put_css_in_file(html_message, journal_name): """ Retrieve the CSS of the journal and insert/inline it in the <head> section of the given html_message. (Used for HTML alert emails) Parameters: journal_name - the journal name html_message - the html message (string) in which the CSS should be inserted Returns: the HTML message with its CSS inlined """ config_strings = get_xml_from_config(["screen"], journal_name) try: css_path = config_strings["screen"][0] except Exception: register_exception(req=None, suffix="No css file for journal %s. Is this right?" % journal_name) return css_file = urlopen('%s/%s' % (CFG_SITE_URL, css_path)) css = css_file.read() css = make_full_paths_in_css(css, journal_name) html_parted = html_message.split("</head>") if len(html_parted) > 1: html = '%s<style type="text/css">%s</style></head>%s' % (html_parted[0], css, html_parted[1]) else: html_parted = html_message.split("<html>") if len(html_parted) > 1: html = '%s<html><head><style type="text/css">%s</style></head>%s' % (html_parted[0], css, html_parted[1]) else: return return html def make_full_paths_in_css(css, journal_name): """ Update the URLs in a CSS from relative to absolute URLs, so that the URLs are accessible from anywhere (Used for HTML alert emails) Parameters: journal_name - the journal name css - a cascading stylesheet (string) Returns: (str) the given css with relative paths converted to absolute paths """ url_pattern = re.compile('''url\(["']?\s*(?P<url>\S*)\s*["']?\)''', re.DOTALL) url_iter = url_pattern.finditer(css) rel_to_full_path = {} for url in url_iter: url_string = url.group("url") url_string = url_string.replace('"', "") url_string = url_string.replace("'", "") if url_string[:6] != "http://": rel_to_full_path[url_string] = '"%s/img/webjournal_%s/%s"' % \ (CFG_SITE_URL, journal_name, url_string) for url in rel_to_full_path.keys(): css = css.replace(url, rel_to_full_path[url]) return css