diff --git a/modules/bibrank/lib/bibrank_citation_searcher.py b/modules/bibrank/lib/bibrank_citation_searcher.py index 961e226c0..7f7b5caae 100644 --- a/modules/bibrank/lib/bibrank_citation_searcher.py +++ b/modules/bibrank/lib/bibrank_citation_searcher.py @@ -1,152 +1,152 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import marshal from zlib import decompress, error from invenio.dbquery import run_sql, OperationalError def init_cited_by_dictionary(): """return citation list dictionary from rnkCITATIONDATA """ query = "select citation_data from rnkCITATIONDATA" try: compressed_citation_dic = run_sql(query) except OperationalError: compressed_citation_dic = [] citation_dic = None if compressed_citation_dic and compressed_citation_dic[0]: citation_dic = marshal.loads(decompress(compressed_citation_dic[0][0])) #debug #dstr = str(citation_dic) return citation_dic def init_reference_list_dictionary(): """return reference list dictionary from rnkCITATIONDATA """ query = "select citation_data_reversed from rnkCITATIONDATA" try: compressed_ref_dic = run_sql(query) except OperationalError: compressed_ref_dic = [] ref_dic = None if compressed_ref_dic and compressed_ref_dic[0] and compressed_ref_dic[0][0]: ref_dic = marshal.loads(decompress(compressed_ref_dic[0][0])) return ref_dic cache_cited_by_dictionary = init_cited_by_dictionary() cache_reference_list_dictionary = init_reference_list_dictionary() ### INTERFACE def get_cited_by(recordid): """Return a list of records that cite recordid""" query = "select citation_data from rnkCITATIONDATA" compressed_citation_dic = run_sql(query) if compressed_citation_dic and compressed_citation_dic[0]: citation_dic = marshal.loads(decompress(compressed_citation_dic[0][0])) ret = [] #empty list if citation_dic.has_key(recordid): ret = citation_dic[recordid] return ret def get_cited_by_list(recordlist): """Return a tuple of ([recid,citation_weight],...) for all the records in recordlist. """ result = [] query = "select relevance_data from rnkMETHODDATA, rnkMETHOD WHERE rnkMETHOD.id=rnkMETHODDATA.id_rnkMETHOD and rnkMETHOD.name='citation'" compressed_citation_weight_dic = run_sql(query) if compressed_citation_weight_dic and compressed_citation_weight_dic[0]: citation_dic = marshal.loads(decompress(compressed_citation_weight_dic[0][0])) rdic = {} #return this, based on values in citation_dic for rid in recordlist: - if citation_dic[rid]: + if citation_dic and citation_dic.has_key(rid) and citation_dic[rid]: tmp = [rid, citation_dic[rid]] else: tmp = [rid, 0] result.append(tmp) return result def calculate_cited_by_list(record_id, sort_order="d"): """Return a tuple of ([recid,citation_weight],...) for all the record in citing RECORD_ID. The resulting recids is sorted by ascending/descending citation weights depending or SORT_ORDER. """ citation_list = [] result = [] # determine which record cite RECORD_ID: if cache_cited_by_dictionary: citation_list = cache_cited_by_dictionary.get(record_id, []) # get their weights, this is weighted citation_list (x is cited by y) query = "select relevance_data from rnkMETHODDATA, rnkMETHOD WHERE rnkMETHOD.id=rnkMETHODDATA.id_rnkMETHOD and rnkMETHOD.name='citation'" compressed_citation_weight_dic = run_sql(query) if compressed_citation_weight_dic and compressed_citation_weight_dic[0]: #has to be prepared for corrupted data! try: citation_dic = marshal.loads(decompress(compressed_citation_weight_dic[0][0])) #citation_dic is {1: 0, .. 81: 4, 82: 0, 83: 0, 84: 3} etc, e.g. recnum-weight for id in citation_list: tmp = [id, citation_dic[id]] result.append(tmp) except error: for id in citation_list: tmp = [id, 1] result.append(tmp) # sort them: if result: if sort_order == "d": result.sort(lambda x, y: cmp(y[1], x[1])) else: result.sort(lambda x, y: cmp(x[1], y[1])) return result def calculate_co_cited_with_list(record_id, sort_order="d"): """Return a tuple of ([recid,co-cited weight],...) for records that are co-cited with RECORD_ID. The resulting recids is sorted by ascending/descending citation weights depending or SORT_ORDER. """ result = [] result_intermediate = {} citation_list = [] if cache_cited_by_dictionary: citation_list = cache_cited_by_dictionary.get(record_id, []) for cit_id in citation_list: reference_list = [] if cache_reference_list_dictionary: reference_list = cache_reference_list_dictionary.get(cit_id, []) for ref_id in reference_list: if not result_intermediate.has_key(ref_id): result_intermediate[ref_id] = 1 else: result_intermediate[ref_id] += 1 for key, value in result_intermediate.iteritems(): if not (key==record_id): result.append([key, value]) if result: if sort_order == "d": result.sort(lambda x, y: cmp(y[1], x[1])) else: result.sort(lambda x, y: cmp(x[1], y[1])) return result diff --git a/modules/bibrank/lib/bibrank_grapher.py b/modules/bibrank/lib/bibrank_grapher.py index a1ead7715..8dcf18e93 100644 --- a/modules/bibrank/lib/bibrank_grapher.py +++ b/modules/bibrank/lib/bibrank_grapher.py @@ -1,208 +1,208 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import os import sys import time import tempfile from invenio.config import \ images, \ storage, \ version, \ webdir from invenio.websubmit_config import * ## test gnuplot presence: cfg_gnuplot_available = 1 try: import Gnuplot except ImportError, e: cfg_gnuplot_available = 0 def write_coordinates_in_tmp_file(lists_coordinates): """write the graph coordinates in a temporary file for reading it later by the create_temporary_image method lists_coordinates is a list of list of this form: [[(1,3),(2,4),(3,5)],[(1,5),(2,5),(3,6)] This file is organized into one or more sets of 2 columns. Each set is separated from the others by two blank lines. Each intern list represents a set and each tuple a line in the file where fist element of the tuple is the element of the first column, and second element of the tuple is the element of the second column. With gnuplot, first column is used as x coordinates, and second column as y coordinates. One set represents a curve in the graph. """ max_y_datas = 0 tempfile.tempdir = webdir + "/img" fname = tempfile.mktemp() file_dest = open(fname, 'a') for list_elem in lists_coordinates: y_axe = [] #prepare data and store them in a file for key_value in list_elem: file_dest.write("%s %s\n"%(key_value[0], key_value[1])) y_axe.append(key_value[1]) max_tmp = 0 if y_axe: max_tmp = max(y_axe) if max_tmp > max_y_datas: max_y_datas = max_tmp file_dest.write("\n\n") file_dest.close() return [fname, max_y_datas] def create_temporary_image(recid, kind_of_graphe, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals): """From a temporary file, draw a gnuplot graph The arguments are as follows: - recid - reccord ID + recid - record ID kind_of_graph - takes one of these values : "citation" ,"download_history", "download_users" All the commons gnuplot commands for these cases, are written at the beginning After the particular commands dependaing of each case are written. data_file - Name of the temporary file which contains the gnuplot datas used to plot the graph. This file is organized into one or more sets of 2 columns. First column contains x coordinates, and second column contains y coordinates. Each set is separated from the others by two blank lines. x_label - Name of the x axe. y_label - Name of the y axe. origin_tuple - Reference coordinates for positionning the graph. y_max - Max value of y. Used to set y range. docid_list - In download_history case, docid_list is used to plot multiple curves. graphe_titles - List of graph titles. It's used to name the curve in the legend. intervals - x tics location and xrange specification""" if cfg_gnuplot_available == 0: return (None, None) #For different curves color_line_list = ['4', '3', '2', '9', '6'] #Gnuplot graphe object g = Gnuplot.Gnuplot() #Graphe name: file to store graph graphe_name = "tmp_%s_%s_stats.png" % (kind_of_graphe, recid) g('set terminal png small') g('set output "%s/img/%s"' % (webdir, graphe_name)) len_intervals = len(intervals) len_docid_list = len(docid_list) # Standard options g('set size 0.5,0.5') g('set origin %s,%s'% (origin_tuple[0], origin_tuple[1])) if x_label == '': g('unset xlabel') else: g.xlabel(s = x_label) if x_label == '': g('unset ylabel') else: g.ylabel(s = y_label) g('set bmargin 5') #let a place at the top of the graph g('set tmargin 1') #Will be passed to g at the end to plot the graphe plot_text = "" if kind_of_graphe == 'download_history': g('set xdata time') #Set x scale as date g('set timefmt "%m/%Y"') #Inform about format in file .dat g('set format x "%b %y"') #Format displaying if len(intervals) > 1 : g('set xrange ["%s":"%s"]' % (intervals[0], intervals[len_intervals-1])) y_offset = max(3, float(y_max)/60) g('set yrange [0:%s]' %str(y_max + y_offset)) if len_intervals > 1 and len_intervals <= 12: g('set xtics rotate %s' % str(tuple(intervals)))#to prevent duplicate tics elif len_intervals > 12 and len_intervals <= 24: g('set xtics rotate "%s", 7776000, "%s"' % (intervals[0], intervals[len_intervals-1])) #3 months intervalls else : g('set xtics rotate "%s",15552000, "%s"' % (intervals[0], intervals[len_intervals-1])) #6 months intervalls if len_docid_list <= 1: #Only one curve #g('set style fill solid 0.25') if len(intervals)<=4: plot_text = plot_command(1, data_file, (0, 0), "", "imp", color_line_list[0], 20) else: plot_text = plot_command(1, data_file, (0, 0), "", "linespoint", color_line_list[0], 1, "pt 26", "ps 0.5") elif len_docid_list > 1: #Multiple curves if len(intervals)<=4: plot_text = plot_command(1, data_file, (0, 0), graphe_titles[0], "imp", color_line_list[0], 20) else: plot_text = plot_command(1, data_file, (0, 0), graphe_titles[0], "linespoint", color_line_list[0], 1, "pt 26", "ps 0.5") for d in range(1, len_docid_list): if len(intervals)<=4: plot_text += plot_command(0, data_file, (d, d) , graphe_titles[d], "imp", color_line_list[d], 20) else : plot_text += plot_command(0, data_file, (d, d) , graphe_titles[d], "linespoint", color_line_list[d], 1, "pt 26", "ps 0.5") if len(intervals)>2: plot_text += plot_command(0, data_file, (len_docid_list, len_docid_list), "", "impulses", 0, 2 ) plot_text += plot_command(0, data_file, (len_docid_list, len_docid_list), "TOTAL", "lines", 0, 5) elif kind_of_graphe == 'download_users': g('set size 0.25,0.5') g('set xrange [0:4]') g('set yrange [0:100]') g('set format y "%g %%"') g("""set xtics ("" 0, "CERN\\n Users" 1, "Other\\n Users" 3, "" 4)""") g('set ytics 0,10,100') g('set boxwidth 0.7 relative') g('set style fill solid 0.25') plot_text = 'plot "%s" using 1:2 title "" with boxes lt 7 lw 2' % data_file else: #citation g('set boxwidth 0.6 relative') g('set style fill solid 0.250000 border -1') g('set xtics rotate %s'% str(tuple(intervals))) g('set xrange [%s:%s]' % (str(intervals[0]), str(intervals[len_intervals-1]))) g('set yrange [0:%s]' %str(y_max+2)) plot_text = """plot "% s" index 0:0 using 1:2 title "" w steps lt %s lw 3""" % (data_file, color_line_list[1]) g('%s' % plot_text) return (graphe_name, data_file) def remove_old_img(prefix_file_name): """Detele all the images older than 10 minutes to prevent to much storage Takes 0.0 seconds for 50 files to delete""" command = "find %s/img/ -name tmp_%s*.png -amin +10 -exec rm -f {} \;" % (webdir, prefix_file_name) return os.system(command) def plot_command(first_line, file_source, indexes, title, style, line_type, line_width, point_type="", point_size=""): """Return a string of a gnuplot plot command.Particularly useful when multiple curves From a temporary file, draw a gnuplot graph Return a plot command string as follows: plot datafile <first curve parameters>, datafile <second curve parameters>,... The arguments are as follows: first_line - only the drawing command of the first curve contains the word plot file_source - data file source which containes coordinates indexes - points out set number in data file source title - title of the curve in the legend box style - respresentation of the curve ex: linespoints, lines ... line_type - color of the line line_width - width of the line point_type - optionnal parameter: if not mentionned it's a wide string. Using in the case of style = linespoints to set point style""" if first_line: plot_text = """plot "%s" index %s:%s using 1:2 title "%s" with %s lt %s lw %s %s %s""" % (file_source, indexes[0], indexes[1], title, style, line_type, line_width, point_type, point_size) else: plot_text = """, "%s" index %s:%s using 1:2 title "%s" with %s lt %s lw %s %s %s""" % (file_source, indexes[0], indexes[1], title, style, line_type, line_width, point_type, point_size) return plot_text