diff --git a/modules/bibmatch/doc/admin/bibmatch-admin-guide.webdoc b/modules/bibmatch/doc/admin/bibmatch-admin-guide.webdoc index 39b4b8f96..985958e44 100644 --- a/modules/bibmatch/doc/admin/bibmatch-admin-guide.webdoc +++ b/modules/bibmatch/doc/admin/bibmatch-admin-guide.webdoc @@ -1,138 +1,150 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

BibMatch matches bibliographic data in a MARCXML file against the database content. With a MARCXML input file, the produced output shows a selection of records in the input that match the database content. This way, it is possible to identify potential duplicate entries, before they are uploaded in a database.

Note: BibMatch only matches against public records attached to the home collection.

BibMatch commmand-line tool

 
 
 

Examples

To match records on title and print out only new (unmatched) ones:
  $ bibmatch [--print-new] --field=\"title\" < input.xml > output.xml
 
To print potential duplicate entries before manual upload, run:
  $ bibmatch --print-match --field=\"245__a\" --mode=\"a\" < input.xml > output.xml
 
To print undecided result from a match on multiple fields:
  $ bibmatch --print-ambiguous --query-string=\"245__a||100__a\" < input.xml > output.xml
 
To print "fuzzy" (almost matching by title) records:
  $ bibmatch --print-fuzzy  < input.xml > output.xml
 
To match against public records on an remote Invenio installation (i.e http://cdsweb.cern.ch):
  $ bibmatch --print-match -i input.xml -r 'http://cdsweb.cern.ch'
 
Using text-marc as output-format:
  $ bibmatch -b out.marc -t < input.xml
 
+To print matched or fuzzy matched records replacing old identifier +(controlfield 001) with one from the matched record, i.e to then be +used with BibUpload to update record: + +
+
+ $ bibmatch -a -1 < input.xml > modified_match.xml
+
+
+ Command line options:
  -0 --print-new (default) print unmatched in stdout
  -1 --print-match print matched records in stdout
  -2 --print-ambiguous print records that match more than 1 existing records
  -3 --print-fuzzy print records that match the longest words in existing records
 
  -b --batch-output=(filename). filename.0 will be new records, filename.1 will be matched,
       filename.2 will be ambiguous, filename.3 will be fuzzy match
  -t --text-marc-output transform the output to text-marc format instead of the default MARCXML
 
  Simple query:
 
  -f --field=(field)
 
  Advanced query:
 
  -c --config=(config-filename)
  -q --query-string=(uploader_querystring)
  -m --mode=(a|e|o|p|r)
  -o --operator=(a|o)
 
  Where mode is:
   "a" all of the words,
   "o" any of the words,
   "e" exact phrase,
   "p" partial phrase,
   "r" regular expression.
 
  Operator is:
   "a" and,
   "o" or.
 
  General options:
 
  -n   --noprocess          Do not print records in stdout.
  -i,  --input              use a named file instead of stdin for input
  -h,  --help               print this help and exit
  -V,  --version            print version information and exit
  -v,  --verbose=LEVEL      verbose level (from 0 to 9, default 1)
  -r,  --remote=URL         match against a remote invenio installation (URL, no trailing '/')
                            Beware: Only searches public records attached to home collection
-
+ -a,  --alter-recid        The recid (controlfield 001) of matched or fuzzy matched records in
+                           output will be replaced by the 001 value of the matched record.
+                           Useful to prepare files to then be used with BibUpload.
 
diff --git a/modules/bibmatch/lib/bibmatch_engine.py b/modules/bibmatch/lib/bibmatch_engine.py index 8e2ff395e..6f383262f 100644 --- a/modules/bibmatch/lib/bibmatch_engine.py +++ b/modules/bibmatch/lib/bibmatch_engine.py @@ -1,617 +1,653 @@ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibMatch tool to match records with database content.""" __revision__ = "$Id$" import sys if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set #for "&" intersection # pylint: enable=W0622 import os import getopt import string from tempfile import mkstemp from invenio.config import CFG_SITE_URL from invenio.invenio_connector import InvenioConnector from invenio.bibrecord import create_records, record_get_field_instances, \ - record_get_field_values, record_xml_output + record_get_field_values, record_xml_output, record_modify_controlfield, \ + record_has_field, record_add_field from invenio import bibconvert from invenio.dbquery import run_sql from invenio.textmarc2xmlmarc import transform_file from invenio.xmlmarc2textmarc import get_sysno_from_record, create_marc_record try: from cStringIO import StringIO except ImportError: from StringIO import StringIO + def usage(): """Print help""" print >> sys.stderr, \ """ Usage: %s [options] - Examples: - - $ bibmatch -b -n < input.xml - $ bibmatch --field=title < input.xml > unmatched.xml - $ bibmatch --field=245__a --mode=a < input.xml > unmatched.xml - $ bibmatch --print-ambiguous --query-string="245__a||100__a" < input.xml > unmatched.xml - $ bibmatch --print-match -i input.xml -r 'http://cdsweb.cern.ch' - - $ bibmatch [options] < input.xml > unmatched.xml - Options: - Output: -0 --print-new (default) print unmatched in stdout -1 --print-match print matched records in stdout -2 --print-ambiguous print records that match more than 1 existing records -3 --print-fuzzy print records that match the longest words in existing records -b --batch-output=(filename). filename.0 will be new records, filename.1 will be matched, filename.2 will be ambiguous, filename.3 will be fuzzy match -t --text-marc-output transform the output to text-marc format instead of the default MARCXML Simple query: -f --field=(field) Advanced query: -c --config=(config-filename) -q --query-string=(uploader_querystring) -m --mode=(a|e|o|p|r) -o --operator=(a|o) Where mode is: "a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression. Operator is: "a" and, "o" or. General options: -n --noprocess Do not print records in stdout. -i, --input use a named file instead of stdin for input -h, --help print this help and exit -V, --version print version information and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) -r, --remote=URL match against a remote invenio installation (URL, no trailing '/') Beware: Only searches public records attached to home collection + -a, --alter-recid The recid (controlfield 001) of matched or fuzzy matched records in + output will be replaced by the 001 value of the matched record. + Useful to prepare files to then be used with BibUpload. + + Examples: + + $ bibmatch -b -n < input.xml + $ bibmatch --field=title < input.xml > unmatched.xml + $ bibmatch --field=245__a --mode=a < input.xml > unmatched.xml + $ bibmatch --print-ambiguous --query-string="245__a||author" < input.xml > ambigmatched.xml + $ bibmatch --print-match -i input.xml -r 'http://cdsweb.cern.ch' + $ bibmatch -a -1 < input.xml > modified_match.xml + $ bibmatch [options] < input.xml > unmatched.xml """ % sys.argv[0] sys.exit(1) return class Querystring: "Holds the information about querystring (p1,f1,m1,op1,p2,f2,m2,op2,p3,f3,m3,aas)." def __init__(self, mode="1"): """Creates querystring instance""" self.pattern = [] self.field = [] self.mode = [] self.operator = [] self.format = [] self.pattern.append("") self.pattern.append("") self.pattern.append("") self.field.append("") self.field.append("") self.field.append("") self.mode.append("") self.mode.append("") self.mode.append("") self.operator.append("") self.operator.append("") self.format.append([]) self.format.append([]) self.format.append([]) self.advanced = 0 return def from_qrystr(self, qrystr="", search_mode="eee", operator="aa"): """Converts qrystr into querystring (uploader format)""" self.default() self.field = [] self.format = [] self.mode = ["e", "e", "e"] fields = string.split(qrystr,"||") for field in fields: tags = string.split(field, "::") i = 0 format = [] for tag in tags: if(i==0): self.field.append(tag) else: format.append(tag) i += 1 self.format.append(format) while(len(self.format) < 3): self.format.append("") while(len(self.field) < 3): self.field.append("") i = 0 for lett in search_mode: self.mode[i] = lett i += 1 i = 0 for lett in operator: self.operator[i] = lett i += 1 return def default(self): self.pattern = [] self.field = [] self.mode = [] self.operator = [] self.format = [] self.pattern.append("") #default: no pattern self.pattern.append("") self.pattern.append("") self.field.append("245__a") #default: this field self.field.append("") self.field.append("") self.mode.append("") #default: no mode self.mode.append("") self.mode.append("") self.operator.append("") self.operator.append("") self.format.append([]) self.format.append([]) self.format.append([]) self.advanced = 1 return def change_search_mode(self, mode="a"): self.mode = [mode, mode, mode] return def search_engine_encode(self): field_ = [] for field in self.field: i = 0 field__ = "" for letter in field: if(letter == "%"): if(i==5): letter = "a" else: letter = "_" i += 1 field__ += str(letter) field_.append(field__) self.field = field_ return def get_field_tags(field): "Gets list of field 'field' for the record with 'sysno' system number from the database." query = "select tag.value from tag left join field_tag on tag.id=field_tag.id_tag left join field on field_tag.id_field=field.id where field.code='%s'" % field; out = [] res = run_sql(query) for row in res: out.append(row[0]) return out def get_subfield(field, subfield): "Return subfield of a field." for sbf in field: if(sbf[0][0][0] == subfield): return sbf[0][0][1] return "" def bylen(word1, word2): return len(word1) - len(word2) def main_words_list(wstr): """Select the longest words for matching""" words = [] if wstr: words = wstr.split() words.sort(cmp=bylen) words.reverse() words = words[:5] return words -def match_records(records, qrystrs=None, perform_request_search_mode="eee", operator="a", verbose=1, server_url=CFG_SITE_URL): - """ Do the actual job. Check which records are new, which are matched, - which are ambiguous and which are fuzzy-matched. - Parameters: - @records: an array of records to analyze - @qrystrs: querystrings - @perform_request_search_mode: run the query in this mode - @operator: "o" "a" - @verbose: be loud - @server_url: server url to match against +def match_records(records, qrystrs=None, perform_request_search_mode="eee", operator="a", verbose=1, server_url=CFG_SITE_URL, modify=0): + """ Match passed records with existing records on a local or remote Invenio + installation. Returns which records are new (no match), which are matched, + which are ambiguous and which are fuzzy-matched. + + @param records: records to analyze + @type records: list of records + + @param qrystrs: Querystrings + @type qrystrs: list of object + + @param server_url: which server to search on. Local installation by default + @type server_url: str + + @param perform_request_search_mode: run the query in this mode + @type perform_request_search_mode: string + + @param operator: "o" "a" + @type operator: str + + @param verbose: be loud + @type verbose: int + + @param modify: output modified records of matches + @type modify: int + + @rtype: list of lists @return an array of arrays of records, like this [newrecs,matchedrecs, ambiguousrecs,fuzzyrecs] """ server = InvenioConnector(server_url) newrecs = [] matchedrecs = [] ambiguousrecs = [] fuzzyrecs = [] record_counter = 0 for rec in records: record_counter += 1 if (verbose > 1): sys.stderr.write("\n Processing record: #%d .." % record_counter) if qrystrs == None: qrystrs = [] if len(qrystrs)==0: qrystrs.append("") more_detailed_info = "" for qrystr in qrystrs: querystring = Querystring() querystring.default() if(qrystr != ""): querystring.from_qrystr(qrystr, perform_request_search_mode, operator) else: querystring.default() querystring.search_engine_encode() ### get field values for record instance inst = [] ### get appropriate fields from database for field in querystring.field: ### use expanded tags tag = field[0:3] ind1 = field[3:4] ind2 = field[4:5] code = field[5:6] if((ind1 == "_")or(ind1 == "%")): ind1 = "" if((ind2 == "_")or(ind2 == "%")): ind2 = "" if((code == "_")or(code == "%")): code = "a" if(field != "001"): finsts = record_get_field_instances(rec[0], tag, ind1, ind2) sbf = get_subfield(finsts, code) inst.append(sbf) elif(field in ["001"]): sbf = record_get_field_values(rec[0], field, ind1="", ind2="", code="") inst.append(sbf) else: inst.append("") ### format acquired field values i = 0 for instance in inst: for format in querystring.format[i]: inst[i] = bibconvert.FormatField(inst[i], format) i += 1 ### perform the search if(inst[0] != ""): p1 = inst[0] f1 = querystring.field[0] m1 = querystring.mode[0] op1 = querystring.operator[0] p2 = inst[1] f2 = querystring.field[1] m2 = querystring.mode[1] op2 = querystring.operator[1] p3 = inst[2] f3 = querystring.field[2] m3 = querystring.mode[2] aas = querystring.advanced #1st run the basic perform_req_search recID_list = server.search( p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, of='id') if (verbose > 8): sys.stderr.write("\nperform_request_search with values"+\ " p1="+str(p1)+" f1="+str(f1)+" m1="+str(m1)+" op1="+str(op1)+\ " p2="+str(p2)+" f2="+str(f2)+" m2="+str(m2)+" op2="+str(op2)+\ " p3="+str(p3)+" f3="+str(f3)+" m3="+str(m3)+\ " result="+str(recID_list)+"\n") if len(recID_list) > 1: #ambig match ambiguousrecs.append(rec) if (verbose > 8): sys.stderr.write("ambiguous\n") if len(recID_list) == 1: #match + if modify: + if record_has_field(rec[0], '001'): + record_modify_controlfield(rec[0], '001', controlfield_value=str(recID_list[0]), field_position_global=1) + else: + record_add_field(rec[0], '001', controlfield_value=str(recID_list[0])) matchedrecs.append(rec) if (verbose > 8): sys.stderr.write("match\n") if len(recID_list) == 0: #no match.. #try fuzzy matching intersected = None #check if all the words appear in the #field of interest words1 = main_words_list(p1) words2 = main_words_list(p2) words3 = main_words_list(p3) for word in words1: word = "'"+word+"'" ilist = server.search(p=word, f=f1, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f1)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) for word in words2: word = "'"+word+"'" ilist = server.search(p=word, f=f2, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f2)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) for word in words3: word = "'"+word+"'" ilist = server.search(p=word, f=f3, of="id") if (verbose > 8): sys.stderr.write("fuzzy perform_request_search with values"+\ " p="+str(word)+" f="+str(f3)+" res "+str(ilist)+"\n") if intersected == None: intersected = ilist intersected = list(set(ilist)&set(intersected)) if intersected: #this was a fuzzy match + if modify: + if record_has_field(rec[0], '001'): + record_modify_controlfield(rec[0], '001', controlfield_value=str(intersected[0]), field_position_global=1) + else: + record_add_field(rec[0], '001', controlfield_value=str(intersected[0])) fuzzyrecs.append(rec) if (verbose > 8): sys.stderr.write("fuzzy\n") else: newrecs.append(rec) if (verbose > 8): sys.stderr.write("new\n") #return results return [newrecs, matchedrecs, ambiguousrecs, fuzzyrecs] def transform_input_to_marcxml(filename, file_input=""): """ Takes the filename or input of text-marc and transforms it to MARCXML. """ if not filename: # Create temporary file to read from tmp_fd, filename = mkstemp() os.write(tmp_fd, file_input) os.close(tmp_fd) try: # Redirect output, transform, restore old references old_stdout = sys.stdout new_stdout = StringIO() sys.stdout = new_stdout transform_file(filename) finally: sys.stdout = old_stdout return new_stdout.getvalue() def main(): """ Record matches database content when defined search gives exactly one record in the result set. By default the match is done on the title field. Using advanced search only 3 fields can be queried concurrently qrystr - querystring in the UpLoader format. """ try: - opts, args = getopt.getopt(sys.argv[1:], "0123hVFm:q:c:nv:o:b:i:r:t", + opts, args = getopt.getopt(sys.argv[1:], "0123hVFm:q:c:nv:o:b:i:r:ta", [ "print-new", "print-match", "print-ambiguous", "print-fuzzy", "help", "version", "mode=", "field=", "query-string=", "config=", "no-process", "verbose=", "operator=", "batch-output=", "input=", "remote=", - "text-marc-output" + "text-marc-output", + "alter-recid" ]) except getopt.GetoptError, e: usage() match_results = [] qrystrs = [] #query strings print_mode = 0 # default match mode to print new records noprocess = 0 perform_request_search_mode = "eee" operator = "aa" verbose = 1 # 0..be quiet file_read = "" #input buffer records = [] batch_output = "" #print stuff in files f_input = "" #read from where, if param "i" server_url = CFG_SITE_URL #url to server performing search, local by default + modify = 0 #alter output with matched record indentifiers predefined_fields = ["title", "author"] textmarc_output = 0 for opt, opt_value in opts: if opt in ["-0", "--print-new"]: print_mode = 0 if opt in ["-1", "--print-match"]: print_mode = 1 if opt in ["-2", "--print-ambiguous"]: print_mode = 2 if opt in ["-3", "--print-fuzzy"]: print_mode = 3 if opt in ["-n", "--no-process"]: noprocess = 1 if opt in ["-h", "--help"]: usage() sys.exit(0) if opt in ["-V", "--version"]: print __revision__ sys.exit(0) if opt in ["-F", "--fuzzy"]: fuzzy = 1 if opt in ["-t", "--text-marc-output"]: textmarc_output = 1 if opt in ["-v", "--verbose"]: verbose = int(opt_value) if opt in ["-q", "--query-string"]: qrystrs.append(opt_value) if opt in ["-m", "--mode"]: perform_request_search_mode = opt_value if opt in ["-o", "--operator"]: operator = opt_value if opt in ["-b", "--batch-output"]: batch_output = opt_value if opt in ["-i", "--input"]: f_input = opt_value if opt in ["-r", "--remote"]: server_url = opt_value + if opt in ["-a", "--alter-recid"]: + modify = 1 if opt in ["-f", "--field"]: alternate_querystring = [] if opt_value in predefined_fields: alternate_querystring = get_field_tags(opt_value) for item in alternate_querystring: qrystrs.append(item) else: qrystrs.append(opt_value) if opt in ["-c", "--config"]: config_file = opt_value config_file_read = bibconvert.read_file(config_file, 0) for line in config_file_read: tmp = string.split(line, "---") if(tmp[0] == "QRYSTR"): qrystrs.append(tmp[1]) if verbose: sys.stderr.write("\nBibMatch: Parsing input file "+f_input+"... ") if not f_input: for line_in in sys.stdin: file_read += line_in else: f = open(f_input) for line_in in f: file_read += line_in f.close() # Detect input type if not file_read.startswith('<'): # Not xml, assume type textmarc file_read = transform_input_to_marcxml(f_input, file_read) records = create_records(file_read) if len(records) == 0: if verbose: sys.stderr.write("\nBibMatch: Input file contains no records.\n") sys.exit() else: if verbose: sys.stderr.write("read %d records" % len(records)) sys.stderr.write("\nBibMatch: Matching ...") match_results = match_records(records, qrystrs, perform_request_search_mode, operator, verbose, - server_url) + server_url, + modify) # set the output according to print.. # 0-newrecs 1-matchedrecs 2-ambiguousrecs 3-fuzzyrecs recs_out = match_results[print_mode] if verbose: sys.stderr.write("\n\n Bibmatch report\n") sys.stderr.write("=" * 35) sys.stderr.write("\n New records : %d" % len(match_results[0])) sys.stderr.write("\n Matched records : %d" % len(match_results[1])) sys.stderr.write("\n Ambiguous records : %d" % len(match_results[2])) sys.stderr.write("\n Fuzzy records : %d\n" % len(match_results[3])) sys.stderr.write("=" * 35) sys.stderr.write("\n Total records : %d\n" % len(records)) if not noprocess: options = {'text-marc':1, 'aleph-marc':0} for record in recs_out: if textmarc_output: sysno = get_sysno_from_record(record[0], options) print create_marc_record(record[0], sysno, options) else: print record_xml_output(record[0]) if batch_output: i = 0 options = {'text-marc':1, 'aleph-marc':0} for result in match_results: filename = "%s.%i" % (batch_output, i) file_fd = open(filename,"w") for record in result: out = "" if textmarc_output: sysno = get_sysno_from_record(record[0], options) out += create_marc_record(record[0], sysno, options) else: out += record_xml_output(record[0]) file_fd.write(out + '\n') file_fd.close() i += 1 diff --git a/modules/bibmatch/lib/bibmatch_regression_tests.py b/modules/bibmatch/lib/bibmatch_regression_tests.py index 400663810..94e02a1b6 100644 --- a/modules/bibmatch/lib/bibmatch_regression_tests.py +++ b/modules/bibmatch/lib/bibmatch_regression_tests.py @@ -1,790 +1,797 @@ # -*- coding: utf-8 -*- ## ## This file is part of CDS Invenio. ## Copyright (C) 2002-2010 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=E1102 """Unit tests for bibmatch.""" __revision__ = "$Id$" from invenio.testutils import make_test_suite, run_test_suite -from invenio.bibrecord import create_records +from invenio.bibrecord import create_records, record_has_field from invenio.bibmatch_engine import match_records, transform_input_to_marcxml import unittest class BibMatchTest(unittest.TestCase): """Test functions to check the functionality of bibmatch.""" def setUp(self): """setting up helper variables for tests""" self.textmarc = """ 000000020 001__ 20 000000020 041__ $$aeng 000000020 088__ $$aJYFL-RR-82-7 000000020 100__ $$aArje, J$$uUniversity of Jyvaskyla 000000020 245__ $$aCharge creation and reset mechanisms in an ion guide isotope separator (IGIS) 000000020 260__ $$aJyvaskyla$$bFinland Univ. Dept. Phys.$$cJul 1982 000000020 300__ $$a18 p 000000020 65017 $$2SzGeCERN$$aDetectors and Experimental Techniques 000000020 909C0 $$y1982 000000020 909C0 $$b19 000000020 909C1 $$uJyväsklä Univ. 000000020 909C1 $$c1990-01-28$$l50$$m2002-01-04$$oBATCH 000000020 909CS $$sn$$w198238n 000000020 980__ $$aREPORT 000000019 001__ 19 000000019 041__ $$aeng 000000019 088__ $$aSTAN-CS-81-898-MF 000000019 100__ $$aWhang, K$$uStanford University 000000019 245__ $$aSeparability as a physical database design methodology 000000019 260__ $$aStanford, CA$$bStanford Univ. Comput. Sci. Dept.$$cOct 1981 000000019 300__ $$a60 p 000000019 65017 $$2SzGeCERN$$aComputing and Computers 000000019 700__ $$aWiederhold, G 000000019 700__ $$aSagalowicz, D 000000019 909C0 $$y1981 000000019 909C0 $$b19 000000019 909C1 $$uStanford Univ. 000000019 909C1 $$c1990-01-28$$l50$$m2002-01-04$$oBATCH 000000019 909CS $$sn$$w198238n 000000019 980__ $$aREPORT """ #this exists in the DB, just some bibliography removed. self.recxml1 = """ SzGeCERN 2341644CERCER SLAC 5208424 hep-th/0209226 eng PUTP-2002-48 SLAC-PUB-9504 SU-ITP-2002-36 Adams, A Stanford University Decapitating Tadpoles 2002 Beijing Beijing Univ. Dept. Phys. 26 Sep 2002 31 p We argue that perturbative quantum field theory and string theory can be consistently modified in the infrared to eliminate, in a radiatively stable manner, tadpole instabilities that arise after supersymmetry breaking. This is achieved by deforming the propagators of classically massless scalar fields and the graviton so as to cancel the contribution of their zero modes. In string theory, this modification of propagators is accomplished by perturbatively deforming the world-sheet action with bi-local operators similar to those that arise in double-trace deformations of AdS/CFT. This results in a perturbatively finite and unitary S-matrix (in the case of string theory, this claim depends on standard assumptions about unitarity in covariant string diagrammatics). The S-matrix is parameterized by arbitrary scalar VEVs, which exacerbates the vacuum degeneracy problem. However, for generic values of these parameters, quantum effects produce masses for the nonzero modes of the scalars, lifting the fluctuating components of the moduli. LANL EDS SzGeCERN Particle Physics - Theory PREPRINT LANL EDS High Energy Physics - Theory McGreevy, J Silverstein, E Adams, Allan Greevy, John Mc Silverstein, Eva http://cdsware.cern.ch/download/invenio-demo-site-files/0209226.pdf http://cdsware.cern.ch/download/invenio-demo-site-files/0209226.ps.gz evas@slac.stanford.edu n 200239 11 20060218 0013 CER01 20020927 PUBLIC 002341644CER PREPRINT """ #this is not in the collection self.recxml2 = """ 9124 SPIRES-5726484 Schulz, Michael B. Caltech C02/06/25.2 Prepared for 477-480 Theory-HEP INSPIRE Conference Paper INSPIRE Phys.Rev.,D61,022001 hep-th/9601083 Phys.Rev.,D53,4129 hep-th/0201029 Phys.Rev.,D65,126009 hep-th/0105097 Phys.Rev.,D66,106006 hep-th/9906070 Nucl.Phys.,B584,69 hep-th/0211182 JHEP,0303,061 A brief overview of hep-th/0201028 prepared for NATO Advanced Study Institute and EC Summer School on Progress in String, Field and Particle Theory, Cargese, Corsica, France, 25 June - 11 July 2002. arXiv arXiv:0810.5197 arXiv hep-th oai:arXiv.org:0810.5197 arXiv CALT-68-2441 Moduli stabilization from fluxes 5 talk: Cargese 2002/06/25 INSPIRE string model INSPIRE compactification INSPIRE moduli: stability INSPIRE orientifold INSPIRE membrane model: D-brane INSPIRE flux INSPIRE supersymmetry INSPIRE D04-00603 DESY Schulz:2002eh SPIRESTeX Conference arXiv Citeable CORE 2008-10 2003-11-17 2009-12-11 """ #ambig match since there are 2 of these self.recxml3 = """ 26 2225350574 fre 518.5:62.01 Dasse, Michel Analyse informatique t.2 L'accomplissement Paris Masson 1972 Informatique 1972 21 1990-01-27 00 2002-04-12 BATCH m 198604 BOOK """ #missing word in title self.recxml4 = """ 92 SzGeCERN 20060616163757.0 hep-th/0606096 eng UTHET-2006-05-01 Koutsoumbas, G National Technical University of Athens Quasi-normal Modes of Electromagnetic Perturbations of Four-Dimensional Topological Black Holes 2006 10 Jun 2006 17 p We study the perturbative behaviour of topological black holes with scalar hair. We calculate both analytically and numerically the quasi-normal modes of the electromagnetic perturbations. In the case of small black holes we find clear evidence of a second-order phase transition of a topological black hole to a hairy configuration. We also find evidence of a second-order phase transition of the AdS vacuum solution to a topological black hole. SzGeCERN Particle Physics - Theory ARTICLE LANL EDS High Energy Physics - Theory Musiri, S Papantonopoulos, E Siopsis, G Koutsoumbas, George Musiri, Suphot Papantonopoulos, Eleftherios Siopsis, George http://137.138.33.172/record/92/files/0606096.pdf 006 J. High Energy Phys. 10 2006 n 200624 13 20070425 1021 CER01 20060613 PUBLIC 002628325CER ARTICLE [1] K. D. Kokkotas and B. G. Schmidt, Living Rev. Relativ. 2 (1999) 2 gr-qc/9909058 [2] H.-P. Nollert, Class. Quantum Gravity 16 (1999) R159 [3] J. S. F. Chan and R. B. Mann, Phys. Rev. D 55 (1997) 7546 gr-qc/9612026 [3] Phys. Rev. D 59 (1999) 064025 [4] G. T. Horowitz and V. E. Hubeny, Phys. Rev. D 62 (2000) 024027 hep-th/9909056 [5] V. Cardoso and J. P. S. Lemos, Phys. Rev. D 64 (2001) 084017 gr-qc/0105103 [6] B. Wang, C. Y. Lin and E. Abdalla, Phys. Lett. B 481 (2000) 79 hep-th/0003295 [7] E. Berti and K. D. Kokkotas, Phys. Rev. D 67 (2003) 064020 gr-qc/0301052 [8] F. Mellor and I. Moss, Phys. Rev. D 41 (1990) 403 [9] C. Martinez and J. Zanelli, Phys. Rev. D 54 (1996) 3830 gr-qc/9604021 [10] M. Henneaux, C. Martinez, R. Troncoso and J. Zanelli, Phys. Rev. D 65 (2002) 104007 hep-th/0201170 [11] C. Martinez, R. Troncoso and J. Zanelli, Phys. Rev. D 67 (2003) 024008 hep-th/0205319 [12] N. Bocharova, K. Bronnikov and V. Melnikov, Vestn. Mosk. Univ. Fizika Astronomy 6 (1970) 706 [12] J. D. Bekenstein, Ann. Phys. 82 (1974) 535 [12] Ann. Phys. 91 (1975) 75 [13] T. Torii, K. Maeda and M. Narita, Phys. Rev. D 64 (2001) 044007 [14] E. Winstanley, Found. Phys. 33 (2003) 111 gr-qc/0205092 [15] T. Hertog and K. Maeda, J. High Energy Phys. 0407 (2004) 051 hep-th/0404261 [16] J. P. S. Lemos, Phys. Lett. B 353 (1995) 46 gr-qc/9404041 [17] R. B. Mann, Class. Quantum Gravity 14 (1997) L109 gr-qc/9607071 [17] R. B. Mann, Nucl. Phys. B 516 (1998) 357 hep-th/9705223 [18] L. Vanzo, Phys. Rev. D 56 (1997) 6475 gr-qc/9705004 [19] D. R. Brill, J. Louko and P. Peldan, Phys. Rev. D 56 (1997) 3600 gr-qc/9705012 [20] D. Birmingham, Class. Quantum Gravity 16 (1999) 1197 hep-th/9808032 [21] R. G. Cai and K. S. Soh, Phys. Rev. D 59 (1999) 044013 gr-qc/9808067 [22] Phys.Rev. D65 (2002) 084006 B. Wang, E. Abdalla and R. B. Mann, [arXiv hep-th/0107243 [23] Phys.Rev. D65 (2002) 084006 R. B. Mann, [arXiv gr-qc/9709039 [24] J. Crisostomo, R. Troncoso and J. Zanelli, Phys. Rev. D 62 (2000) 084013 hep-th/0003271 [25] R. Aros, R. Troncoso and J. Zanelli, Phys. Rev. D 63 (2001) 084015 hep-th/0011097 [26] R. G. Cai, Y. S. Myung and Y. Z. Zhang, Phys. Rev. D 65 (2002) 084019 hep-th/0110234 [27] M. H. Dehghani, Phys. Rev. D 70 (2004) 064019 hep-th/0405206 [28] C. Martinez, R. Troncoso and J. Zanelli, Phys. Rev. D 70 (2004) 084035 hep-th/0406111 [29] Phys.Rev. D74 (2006) 044028 C. Martinez, J. P. Staforelli and R. Troncoso, [arXiv hep-th/0512022 [29] C. Martinez and R. Troncoso, [arXiv Phys.Rev. D74 (2006) 064007 hep-th/0606130 [30] E. Winstanley, Class. Quantum Gravity 22 (2005) 2233 gr-qc/0501096 [30] E. Radu and E. Win-stanley, Phys. Rev. D 72 (2005) 024017 gr-qc/0503095 [30] A. M. Barlow, D. Doherty and E. Winstanley, Phys. Rev. D 72 (2005) 024008 gr-qc/0504087 [31] I. Papadimitriou, [arXiv JHEP 0702 (2007) 008 hep-th/0606038 [32] P. Breitenlohner and D. Z. Freedman, Phys. Lett. B 115 (1982) 197 [32] Ann. Phys. 144 (1982) 249 [33] L. Mezincescu and P. K. Townsend, Ann. Phys. 160 (1985) 406 [34] V. Cardoso, J. Natario and R. Schiappa, J. Math. Phys. 45 (2004) 4698 hep-th/0403132 [35] J. Natario and R. Schiappa, Adv. Theor. Math. Phys. 8 (2004) 1001 hep-th/0411267 [36] S. Musiri, S. Ness and G. Siopsis, Phys. Rev. D 73 (2006) 064001 hep-th/0511113 [37] L. Motl and A. Neitzke, Adv. Theor. Math. Phys. 7 (2003) 307 hep-th/0301173 [38] Astron. J. M. Medved, D. Martin and M. Visser, Class. Quantum Gravity 21 (2004) 2393 gr-qc/0310097 [39] W.-H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery in Numerical Recipies (Cambridge University Press, Cambridge, England, 1992). [40] G. Koutsoumbas, S. Musiri, E. Papantonopoulos and G. Siopsis, in preparation. CDS Invenio/0.92.0.20070116 refextract/0.92.0.20070116-1181414732-0-36-41-0-2 """ return def test_check_existing(self): """bibmatch - check existing record""" records = create_records(self.recxml1) [dummy1, matchedrecs, dummy2, dummy3] = match_records(records) self.assertEqual(1,len(matchedrecs)) def test_check_new(self): """bibmatch - check a new record""" records = create_records(self.recxml2) [newrecs, dummy1, dummy2, dummy3] = match_records(records) self.assertEqual(1,len(newrecs)) def test_check_ambiguous(self): """bibmatch - check an ambiguous record""" records = create_records(self.recxml3) [dummy1, dummy2, ambig, dummy3] = match_records(records) self.assertEqual(1,len(ambig)) def test_check_fuzzy(self): """bibmatch - check fuzzily matched record""" records = create_records(self.recxml4) [dummy1, dummy2, dummy3, fuzzyrecs] = match_records(records) self.assertEqual(1,len(fuzzyrecs)) def test_check_remote(self): - """bibmatch - check remote match (Invenio demo site) """ + """bibmatch - check remote match (Invenio demo site)""" records = create_records(self.recxml1) [dummy1, matchedrecs, dummy3, fuzzyrecs] = match_records(records, server_url="http://invenio-demo.cern.ch") self.assertEqual(1,len(matchedrecs)) def test_check_textmarc(self): - """bibmatch - check textmarc as input """ + """bibmatch - check textmarc as input""" marcxml = transform_input_to_marcxml("", self.textmarc) records = create_records(marcxml) [dummy1, matchedrecs, dummy3, fuzzyrecs] = match_records(records, server_url="http://invenio-demo.cern.ch") self.assertEqual(2,len(matchedrecs)) + def test_check_altered(self): + """bibmatch - check altered match""" + records = create_records(self.recxml1) + self.assertTrue(not record_has_field(records[0][0], '001')) + [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, modify=1) + self.assertTrue(record_has_field(matchedrecs[0][0], '001')) + TEST_SUITE = make_test_suite(BibMatchTest) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True)