Page MenuHomec4science

bibupload.in
No OneTemporary

File Metadata

Created
Sun, Nov 10, 12:58

bibupload.in

## $Id$
## Script that uploades LoC MARC XML file into the BIB database.
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start PHP:
<protect>#!</protect><PHP> -q
<?
<protect>#$Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
$mysql_dbname = "<DBNAME>";
$mysql_machine = "<DBHOST>";
$mysql_docid = "<DBUSER>";
$mysql_docpw = "<DBPASS>";
$supportemail = "<SUPPORTEMAIL>";
$__version__ = "<: print generate_pretty_version_string('$Id$'); :>";
### okay, config read, from now on the script can continue ###
<protect>
##########################################################################
# file: bibupload
# created by: T. Baron
# revision: $Id$
# parameters: Usage: bibupload [options] file1.xml [file2.xml ...]
# Options:
# -h, --help print this help
# -d, --display display records analysis - no integration
# -b, --database display integration requests - integration
# -m, --mute Silent mode.
# description: this program takes file(s) written in XML following
# the DTD used in the LoC MARC XML. For each record found,
# the program decides whether it is an upload or a
# correction (sysno existence, report number existence).
# Then each field is created or corrected.
# The process is based on three classes: record, field and
# subfield.
#
##########################################################################
// first test if pcntl is loaded
if (!extension_loaded('pcntl')) {
print "error... pcntl php module needed for running bibupload. Check your php installation";
exit();
}
// then test if mysql is loaded
if (!extension_loaded('mysql')) {
print "error... mysql php module needed for running bibupload. Check your php installation";
exit();
}
// time limit for script execution
set_time_limit(86340);
// array of all the "untouchable" field tags
$strongtags = array("909COo");
//////////////////////////////////////////////////////////////////////
// BIBSCHED FUNCTIONS
// These functions allow bibupload to run in the framework of
// the CDS scheduler
//////////////////////////////////////////////////////////////////////
function task_submit()
{
// This function submits the task to bibsched task queue
// This is what users will be invoking via command line.
global $options,$errorfp;
// To be sure no task is set
if ($options['TASKRUN']) {
$options['TASKRUN'] = FALSE;
}
if ($options['DATE'] == "")
$date = "NOW()";
else
$date = "'".$options['DATE']."'";
$cwd = getcwd();
$totalrecs = 0;
$fullrecordfiles = array();
chdir("/");
foreach ($options['recordfiles'] as $originalrecordfile) {
$recordfile = $originalrecordfile;
if (!file_exists("$recordfile"))
$recordfile = $cwd."/$recordfile";
if (!file_exists("$recordfile")) {
fwrite($errorfp,"Error: Cannot find file $recordfile.\n");
exit(1);
}
array_push($fullrecordfiles,$recordfile);
$totalrecs += trim(`grep -a "<record" $recordfile | wc -l`);
}
$options['recordfiles'] = $fullrecordfiles;
// submit task
$query = "INSERT INTO schTASK (id,proc,user,runtime,arguments,status,progress)
VALUES (NULL,'bibupload','".$options['USER']."',$date,'".mysql_escape_string(serialize($options))."','WAITING','Done 0 out of $totalrecs')";
$res = mysql_perform_query($query);
$taskid = mysql_insert_id();
print "Task #$taskid submitted.";
}
function task_sig_sleep($signo)
{
// Signal handler for the 'sleep' signal sent by BibSched.
global $sleepstate;
if ($sleepstate == "") {
// we leave the current integration finish
print " [going to sleep...] ";
task_update_state("GOING TO SLEEP");
$sleepstate = $signo;
}
else {
print " [sleeping...] ";
task_update_state("SLEEPING");
while ($sleepstate) {
usleep(10);
}
}
}
function task_sig_wakeup($signo)
{
//Signal handler for the 'wakeup' signal sent by BibSched.
global $sleepstate;
$sleepstate = 0;
print " [continuing...] ";
task_update_state("CONTINUING");
}
function task_sig_stop($signo)
{
global $stopstate;
//Signal handler for the 'stop' signal sent by BibSched.
if ($stopstate == "") {
// we leave the current integration finish
print " [stopping...] ";
task_update_state("STOPPING");
$stopstate = $signo;
}
else {
// then really quit
print " [stopped...] ";
task_update_state("STOPPED");
mysql_close();
exit(0);
}
}
function task_sig_suicide($signo)
{
//Signal handler for the 'suicide' signal sent by BibSched.
print " [suiciding myself now...] ";
task_update_state("SUICIDING");
print " [suicided] ";
task_update_state("SUICIDED");
exit(0);
}
function task_sig_unknown($signo)
{
//Signal handler for the other unknown signals sent by shell or user.
print " [unknown signal $signo ignored] "; // do nothing
}
function task_update_progress($msg)
{
//Updates progress information in the BibSched task table.
global $options;
$query = "UPDATE schTASK SET progress='".mysql_escape_string($msg)."' where id=".$options['TASKRUN'];
mysql_perform_query($query);
}
function task_update_state($val)
{
//Updates state information in the BibSched task table.
global $options;
$query = "UPDATE schTASK SET status='".mysql_escape_string($val)."' where id=".$options['TASKRUN'];
mysql_perform_query($query);
}
function task_get_state()
{
//Retrieve state information in the BibSched task table.
global $options;
$query = "SELECT status from schTASK WHERE id=".$options['TASKRUN'];
$res = mysql_perform_query($query);
$row = mysql_fetch_row($res);
return $row[0];
}
function task_get_progress()
{
//Retrieve state information in the BibSched task table.
global $options;
$query = "SELECT progress from schTASK WHERE id=".$options['TASKRUN'];
$res = mysql_perform_query($query);
$row = mysql_fetch_row($res);
$progress = ereg_replace("Done (.*) out of .*","\\1",$row[0]);
return $progress;
}
function task_get_options($id)
{
//Returns options for the task 'id' read from the BibSched task
//queue table.
global $errorfp;
$out = array();
$query = "SELECT arguments FROM schTASK WHERE id=$id AND proc='bibupload'";
$res = mysql_perform_query($query);
$row = mysql_fetch_row($res);
if ($row) {
$out = unserialize($row[0]);
}
else{
fwrite($errorfp,"Error: BibUpload task #$id does not seem to exist.\n");
exit(1);
}
return $out;
}
//////////////////////////////////////////////////////////////////////
// STRUCTURE CLASSES
// A MARC XML record is composed of some data fields elements which
// may in turn contain subfields.
// A record starts with "<record>" and ends with "</record>
//////////////////////////////////////////////////////////////////////
class subfield {
// this class stores the subfield items
var $subfieldcode; // type
var $value; // value
var $position; // position
// constructor
function subfield($param1=null, $param2=null, $param3=null) {
global $currentfield;
global $errorfp;
$numargs = func_num_args();
$arg_list = func_get_args();
$args = "";
for ($i=0; $i<$numargs; $i++) {
if ($i != 0) {
$args .= ", ";
}
$args .= "\$param" . ($i + 1);
}
// Call constructor function
eval("\$this->constructor" . $i . "(" . $args . ");");
}
function constructor1($number)
{
global $currentfield;
global $errorfp;
global $validrec;
$this->position = $number;
$this->subfieldcode = $currentfield[attributes][CODE];
if ($this->subfieldcode == "")
$this->subfieldcode == "_";
if ($currentfield[type] == "complete")
$this->value = $currentfield[value];
else
{
// get cdata
getnexttag();
if ($currentfield[tag]=="SUBFIELD" && $currentfield[type] == "cdata")
{
$this->value = $currentfield[value];
getnexttag();
}
//close tag
if ($currentfield[tag]!="SUBFIELD" || $currentfield[type] != "close")
{
fwrite($errorfp,"subfield: ERROR! Bad XML file!\n");
$validrec = false;
}
getnexttag();
}
}
function constructor3($subfieldcode,$value,$position) {
$this->position = $position;
$this->subfieldcode = $subfieldcode;
$this->value = $value;
}
}
class field {
// this class stores the field items
var $type; // type (tag)
var $i1; // indicator 1
var $i2; // indicator 2
var $value;
var $subfields;
var $nbsubfields;
var $position;
// constructor
function field($param1=null, $param2=null, $param3=null, $param4=null, $param5=null, $param6=null) {
global $currentfield;
$numargs = func_num_args();
$arg_list = func_get_args();
$args = "";
for ($i=0; $i<$numargs; $i++) {
if ($i != 0) {
$args .= ", ";
}
$args .= "\$param" . ($i + 1);
}
// Call constructor function
eval("\$this->constructor" . $i . "(" . $args . ");");
}
function constructor1($nbfield)
{
global $currentfield;
$this->position = $nbfield;
$this->type = $currentfield[attributes][TAG];
$this->i1 = strtoupper($currentfield[attributes][IND1]);
$this->i2 = strtoupper($currentfield[attributes][IND2]);
if (ereg_replace("[ \n\r\t]+","",$this->i1) == "") { $this->i1 = "_"; }
if (ereg_replace("[ \n\r\t]+","",$this->i2) == "") { $this->i2 = "_"; }
$this->nbsubfields = 0;
if ($currentfield[type] == "complete")
{
$this->value = $currentfield[value];
getnexttag();
}
else
{
getnexttag();
while (($currentfield[tag] == "SUBFIELD" ||
($currentfield[tag] == "CONTROLFIELD" &&
$currentfield[type] == "cdata") ||
($currentfield[tag] == "DATAFIELD" &&
$currentfield[type] == "cdata")))
{
if (($currentfield[tag] == "CONTROLFIELD" &&
$currentfield[type] == "cdata") ||
($currentfield[tag] == "DATAFIELD" &&
$currentfield[type] == "cdata"))
$this->value = $currentfield[value];
else
{
$this->subfields[$this->nbsubfields] = new subfield(
$this->nbsubfields);
$this->nbsubfields ++;
}
getnexttag();
}
}
}
function constructor6($position,$type,$i1,$i2,$subfieldcode,$value) {
$this->position = $position;
$this->type = $type;
$this->i1 = $i1;
$this->i2 = $i2;
$this->nbsubfields = 1;
$this->subfields[0] = new subfield($subfieldcode,$value,0);
}
function get_subfield($subfieldcode)
{
$subfieldcode = strtolower($subfieldcode);
$j=0;
while (strtolower($this->subfields[$j]->subfieldcode) != "$subfieldcode"
&& $j <= $this->nbsubfields)
$j++;
if ($j == $this->nbsubfields)
return 0;
else
return $this->subfields[$j]->value;
}
}
class record {
// this class stores the record items
var $fields; // contains all fields
var $recid; // system number: 000 if new
var $nbfields; // number of fields
// Constructor
function record()
{
global $currentfield;
global $errorfp;
global $validrec;
$this->nbfields = 0;
getnexttag();
while ( $currentfield[tag] != "CONTROLFIELD"
&& $currentfield[tag] != "DATAFIELD"
&& $currentfield != NULL) {
getnexttag();
}
while ($currentfield[tag] == "CONTROLFIELD" || $currentfield[tag] == "DATAFIELD")
{
// If system number
if ($currentfield[attributes][TAG] == "001")
{
$this->recid = $currentfield[value];
getnexttag();
if ($this->recid == "")
$this->recid = "000";
}
else if ($currentfield[type] != "close")
{
$this->fields[$this->nbfields] = new field($this->nbfields);
$this->nbfields ++;
}
else if ($currentfield[type] == "close")
getnexttag();
else
{
fwrite($errorfp,"field: ERROR! Bad XML file!\n");
$validrec = false;
}
}
if ($this->recid == "")
$this->recid = "000";
}
function get_field($tag,$i1,$i2)
{
$j=0;
while ((($this->fields[$j]->type != "$tag") ||
($this->fields[$j]->i1 != "$i1") ||
($this->fields[$j]->i2 != "$i2")) && ($j <= ($this->nbfields)))
$j++;
if ($j == $this->nbfields)
return 0;
else
return $this->fields[$j];
}
function get_subfield($tag,$i1,$i2,$subfieldcode)
{
global $errorfp;
$i1 = strtolower($i1);
$i2 = strtolower($i2);
$subfieldcode = strtolower($subfieldcode);
$tag = strtolower($tag);
$j=0;
$found = 0;
while (!$found && ($j <= $this->nbfields))
{
if ($this->fields[$j] &&
strtolower($this->fields[$j]->type) == "$tag" &&
strtolower($this->fields[$j]->i1) == "$i1" &&
strtolower($this->fields[$j]->i2) == "$i2")
{
if ($this->fields[$j]->get_subfield($subfieldcode)) {
$found = $this->fields[$j]->get_subfield($subfieldcode);
}
}
$j++;
}
return $found;
}
}
//////////////////////////////////////////////////////////////////////
// TREATMENT FUNCTIONS
//////////////////////////////////////////////////////////////////////
// from an array passed as parameter, this function returns a
// comma-separated list of all elements in the array
function createTextFromArray($array)
{
reset($array);
$text = "(''";
while (list($key,$value) = each($array)) {
$text .= ",'".$value."'";
}
$text .= ")";
return $text;
}
// This function gets the next available field number for
// a given table name, and a given record id
function getNextFieldNumber($tablename,$recid)
{
global $maxfieldno,$options;
if ($maxfieldno[$tablename] != "") {
$maxfieldno[$tablename] = $maxfieldno[$tablename] + 1;
$field_number = $maxfieldno[$tablename];
}
else {
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT MAX(field_number)
FROM bibrec_$tablename
WHERE id_bibrec=$recid");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select MAX(field_number) from ".
"bibrec_$tablename where id_bibrec=$recid\n";
$row = mysql_fetch_row($res);
$maxfieldno[$tablename] = $row[0]+1;
$field_number = $maxfieldno[$tablename];
}
return $field_number;
}
function mysql_perform_query($query, $behaviour="continue") {
global $errorfp, $sock, $mysql_machine, $mysql_docid, $mysql_docpw, $mysql_dbname;
// connect to mysql
$sock = @mysql_connect("${mysql_machine}","${mysql_docid}","${mysql_docpw}");
while (!$sock) {
sleep(1);
$sock = @mysql_connect("${mysql_machine}","${mysql_docid}","${mysql_docpw}");
mysql_select_db("${mysql_dbname}") or die("Sorry, cannot choose ".${mysql_dbname}." database. Please try later.\n");
}
$result = mysql_query($query);
if (!$result) {
fwrite($errorfp, "\nMySQL: could not execute your query:\n $query" .
"\nContact $supportemail." .
"\nError " . mysql_errno() . ": " . mysql_error() . ".\n");
if ($behaviour == "die"){
exit(0);
}
}
return $result;
}
function getnexttag()
{
global $i; // current position in the structure
global $index; // structure containing the XML
global $currentfield;
global $nbTags;
// this function retrieves the next valuable field (one of "record"
// "controlfield", "datafield" or "subfield")
//get next tag
$i++;
while ($index[$i][tag] != "RECORD" &&
$index[$i][tag] != "CONTROLFIELD" &&
$index[$i][tag] != "DATAFIELD" &&
$index[$i][tag] != "SUBFIELD" && $i <= $nbTags)
$i++;
if ($i <= $nbTags)
$currentfield = $index[$i];
else
$currentfield = NULL;
}
function Test_In_DB($value,$tag,$table)
{
global $options, $mysql_dbname;
$value = ereg_replace("^[\n\r\t ]*","",$value);
$value = ereg_replace("[\r\n\t ]* $","",$value);
$value = mysql_escape_string($value);
mysql_select_db($mysql_dbname);
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT id,value
FROM $table
WHERE value='$value' and
tag='$tag'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id from $table where value='$value' and".
" tag='$tag'\n";
while ($arr = mysql_fetch_row($res))
{
if ($arr[1] == "$value") {
$t1 = getmicrotime();
$res2 = mysql_perform_query("
SELECT id_bibrec
FROM bibrec_".$table."
WHERE id_bibxxx=".$arr[0]);
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id_bibrec from bibrec_".$table.
" where id_bibxxx=".$arr[0]."\n";
$result = mysql_fetch_row($res2);
return $result[0];
}
}
return 0;
}
function insert($rec,$logfp)
{
global $WORDFILEJOB,$options;
//create the bibrec
$now = strftime("%Y-%m-%d %H:%M:%S");
$cd = $now;
$md = $now;
$t1 = getmicrotime();
mysql_perform_query("
INSERT
INTO bibrec (creation_date,modification_date)
values('$cd','$md')");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." insert into bibrec (creation_date,modification_date)".
" values('$cd','$md')\n";
#get the created item id
$recid = mysql_insert_id();
#for each field
$nbfields = $rec->nbfields;
for ($j=0;$j<$nbfields;$j++)
{
if ($rec->fields[$j])
{
$position = $j + 1;
$field = $rec->fields[$j];
$tag = $field->type;
$i1 = $field->i1;
$i2 = $field->i2;
$tablenumber = substr($tag,0,2);
$tablename = "bib" . $tablenumber . "x";
// special treatment for the format field
if ($tag == "FMT")
{
if ($field->subfields[1]->value != "")
{
$format = $field->subfields[0]->value;
$value = mysql_escape_string(gzcompress($field->subfields[1]->value));
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT id
FROM bibfmt
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id from bibfmt where".
" id_bibrec=$recid and format='$format'\n";
if (mysql_num_rows($res) != 0)
{
$t1 = getmicrotime();
mysql_perform_query("
UPDATE bibfmt
SET format='$format',
value='$value',
last_updated=NOW()
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." update bibfmt set format='$format',".
"value='$value', last_updated=NOW()".
" where id_bibrec=$recid and format='$format'\n";
}
else
{
$t1 = getmicrotime();
mysql_perform_query("
INSERT
INTO bibfmt(id_bibrec,format,value,last_updated)
values ($recid,'$format','$value',NOW())");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." insert into bibfmt(id_bibrec,format,".
"value, last_updated)values($recid,'$format','$value'," .
"NOW())\n";
}
}
else
{
$format = $field->subfields[0]->value;
$t1 = getmicrotime();
mysql_perform_query("
DELETE
FROM bibfmt
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." delete from bibfmt where id_bibrec=$recid".
" and format='$format'\n";
}
}
// we do not want field 001 to be integrated
elseif ($tag != "001")
{
#main value
if ($field->value != "")
{
$fulltag = "$tag${i1}${i2}_";
$value = mysql_escape_string($field->value);
insertfield($fulltag,$value,$tablename,$recid,$position,$logfp);
}
#sub fields
$nbsubfields = $field->nbsubfields;
for ($k=0;$k<$nbsubfields;$k++)
{
$subfield = $field->subfields[$k];
$subfieldcode = $subfield->subfieldcode;
if ($subfield->value != "")
{
$fulltag = "$tag$i1$i2$subfieldcode";
$value = mysql_escape_string($subfield->value);
insertfield($fulltag,$value,$tablename,$recid,$position,$logfp);
}
}
}
}
}
// insert xml full format
insertFMTfield($recid,$rec);
if (!$options['MUTE']) {
print "inserted. [$recid]\n";
}
}
function deletefield($recid,$fulltag,$tablename,$logfp)
{
global $strongtags,$options;
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT DISTINCT field_number
FROM bibrec_$tablename,
$tablename
WHERE id_bibxxx = id and
id_bibrec=$recid and
tag LIKE '$fulltag'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select DISTINCT field_number from bibrec_$tablename,$tablename".
" where id_bibxxx = id and id_bibrec=$recid and tag LIKE '$fulltag'\n";
while ($row = mysql_fetch_row($res)) {
// delete main value + subfields
$t1 = getmicrotime();
mysql_perform_query("
DELETE
FROM bibrec_$tablename
USING bibrec_$tablename,
$tablename
WHERE id_bibrec=$recid and
field_number=$row[0] and
id_bibxxx=id and
tag NOT IN ".createTextFromArray($strongtags));
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." delete from bibrec_$tablename using bibrec_$tablename, ".
" $tablename where id_bibrec=$recid and field_number=$row[0] and ".
"id_bibxxx=id and tag NOT IN ".createTextFromArray($strongtags)."\n";
}
}
function update($rec,$recid,$logfp)
{
global $WORDFILEJOB,$strongtags,$options,$maxfieldno;
global $errorfp;
$now = strftime("%Y-%m-%d %H:%M:%S");
$md = $now;
// firstly check whether this 'id' exists in the bibrec table:
$res = mysql_perform_query("SELECT COUNT(*) FROM bibrec WHERE id=$recid\n");
$row = mysql_fetch_row($res);
if ($row[0] == 0) {
if (!$options['MUTE']) {
fwrite($errorfp,"update error. [record $recid does not exist]\n");
}
return;
}
mysql_free_result($res);
// update the modification datetime field
// if we don't have only FMT fields inside the record
$onlyFMT = true;
for ($j=0;$j<$rec->nbfields;$j++) {
if ($rec->fields[$j] && $rec->fields[$j]->type != "FMT") {
$onlyFMT = false;
}
}
if (!$onlyFMT && !$options['FORMAT'] && !$options['NOTIMECHANGE']) {
$t1 = getmicrotime();
mysql_perform_query("
UPDATE bibrec
SET modification_date='$md'
WHERE id=$recid\n");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." update bibrec set modification_date='$md',".
" where id=$recid\n";
}
if ($options['HBFMTUPDATE']) {
mysql_perform_query("
UPDATE bibfmt
SET last_updated='1970-01-01 00:00:00'
WHERE id_bibrec=$recid and
format='hb'\n");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." update bibfmt set last_updated='1970-01-01 00:00:00',".
" where id_bibrec=$recid and format='hb'\n";
}
// then if this is a replace, we retrieve the existing 964__a field (holdings) and adds it to the record
if($options['REPLACE'] && !$onlyFMT) {
$sql = "
SELECT id,value
FROM bib96x,
bibrec_bib96x
WHERE id=id_bibxxx and
id_bibrec=$recid and
tag='964__a'
LIMIT 1";
$res = mysql_perform_query($sql);
if ($row = mysql_fetch_row($res)) {
$value = $row[1];
$rec->fields[$rec->nbfields] = new field($rec->nbfields,"964","_","_","a",$value);
$rec->nbfields ++;
}
}
$nbfields = $rec->nbfields;
if ($options['CORRECT']) {
// delete each found field
for ($j=0;$j<$nbfields;$j++) {
$field = $rec->fields[$j];
$tag = $field->type;
$i1 = $field->i1;
$i2 = $field->i2;
$tablenumber = substr($tag,0,2);
$tablename = "bib" . $tablenumber . "x";
$nbsubfields = $field->nbsubfields;
for ($k=0;$k<$nbsubfields;$k++) {
$subfield = $field->subfields[$k];
$subfieldcode = $subfield->subfieldcode;
$fulltag = "$tag$i1$i2$subfieldcode";
if ($tag != "FMT") {
if (!$options['REF'] || $tag == "999")
deletefield($recid,$fulltag,$tablename,$logfp);
}
}
}
// delete the xml format
deletexmformat($recid);
}
if ($options['APPEND']) {
// delete the xml format
deletexmformat($recid);
}
if($options['REPLACE'] && !$onlyFMT) {
for ($i=0;$i<100;$i++) {
deletefield($recid,"%","bib".($i<10 ? "0$i" : "$i")."x",$logfp);
}
}
// insert each found field
for ($j=0;$j<$nbfields;$j++) {
if ($rec->fields[$j]) {
$fieldnumber = -1;
$field = $rec->fields[$j];
$tag = $field->type;
$i1 = $field->i1;
$i2 = $field->i2;
$tablenumber = substr($tag,0,2);
$tablename = "bib" . $tablenumber . "x";
$fulltag = "$tag$i1${i2}_";
if ($tag == "FMT") {
if ($field->subfields[1]->value != "") {
$format = $field->subfields[0]->value;
$value = mysql_escape_string(gzcompress($field->subfields[1]->value));
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT id
FROM bibfmt
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id from bibfmt where ".
"id_bibrec=$recid and format='$format'\n";
if (mysql_num_rows($res) != 0) {
$t1 = getmicrotime();
mysql_perform_query("
UPDATE bibfmt
SET format='$format',
value='$value',
last_updated=NOW()
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." update bibfmt set format='$format',".
"value='$value', last_updated=NOW()".
" where id_bibrec=$recid and format='$format'\n";
}
else {
$t1 = getmicrotime();
mysql_perform_query("
INSERT
INTO bibfmt(id_bibrec,format,value,last_updated)
values($recid,'$format','$value',NOW())");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." insert into bibfmt(id_bibrec,format,".
"value, last_updated)values($recid,'$format','$value', NOW())\n";
}
}
else {
$format = $field->subfields[0]->value;
$t1 = getmicrotime();
$res = mysql_perform_query("
DELETE
FROM bibfmt
WHERE id_bibrec=$recid and
format='$format'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." delete from bibfmt where id_bibrec=$recid".
" and format='$format'\n";
}
}
else if (!$options['FORMAT']) {
if (!$options['REF'] || $tag=="999") {
// main value
if ($field->value != "") {
$fieldnumber = getNextFieldNumber($tablename,$recid);
$value = mysql_escape_string($field->value);
insertfield($fulltag,$value,$tablename,$recid,
$fieldnumber,$logfp);
}
// sub fields
$nbsubfields = $field->nbsubfields;
for ($k=0;$k<$nbsubfields;$k++) {
$subfield = $field->subfields[$k];
$subfieldcode = $subfield->subfieldcode;
$fulltag = "$tag$i1$i2$subfieldcode";
if ($subfield->value != "") { // got rid of and !in_array($fulltag,$strongtags)
if ($fieldnumber == "-1")
$fieldnumber = getNextFieldNumber($tablename,$recid);
$value = mysql_escape_string($subfield->value);
insertfield($fulltag,$value,$tablename,$recid,
$fieldnumber,$logfp);
}
}
}
}
}
}
if($options['REPLACE'] && !$onlyFMT) {
// insert xml full format
insertFMTfield($recid,$rec);
}
if (!$options['MUTE']) {
print "updated. [$recid]\n";
}
}
function deletexmformat($recid)
{
$res = mysql_perform_query("delete from bibfmt where id_bibrec=$recid and format='xm'");
}
function insertFMTfield($recid,$rec)
{
global $xml,$options;
$field964 = $rec->get_field("964","_","_");
if ($field964 != 0) {
$value964 = $field964->get_subfield("a");
$xml = ereg_replace("<record([^>]*)>","<record\\1>
<datafield tag=\"964\" ind1=\"\" ind2=\"\">
<subfield code=\"a\">$value964</subfield>
</datafield>",$xml);
}
if ($recid != "000" && $recid != "") {
// get rid of existing 001
$xml = ereg_replace("<controlfield tag=\"001\">[^<]*</controlfield>","",$xml);
// add sysno to xml record
$xml = ereg_replace("<record([^>]*)>","<record\\1>
<controlfield tag=\"001\">$recid</controlfield>",$xml);
}
$format = "xm";
$value = mysql_escape_string(gzcompress($xml));
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT id
FROM bibfmt
WHERE id_bibrec=$recid and
format='xm'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id from bibfmt where ".
"id_bibrec=$recid and format='xm'\n";
if (mysql_num_rows($res) != 0) {
$t1 = getmicrotime();
mysql_perform_query("
UPDATE bibfmt
SET format='xm',
value='$value',
last_updated=NOW()
WHERE id_bibrec=$recid and
format='xm'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." update bibfmt set format='xm',".
"value='$value', last_updated=NOW() where id_bibrec=$recid and ".
"format='xm'\n";
}
else {
$t1 = getmicrotime();
mysql_perform_query("
INSERT
INTO bibfmt(id_bibrec,format,value,last_updated)
values($recid,'xm','$value',NOW())");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." insert into bibfmt(id_bibrec,format,".
"value,last_updated)values($recid,'xm','$value',NOW())\n";
}
}
function insertfield($fulltag,$value,$tablename,$recid,$position,$logfp)
{
global $options,$errorfp;
$t1 = getmicrotime();
$res = mysql_perform_query("
SELECT id,value
FROM $tablename
WHERE tag='$fulltag' and
value='$value'");
$t2 = getmicrotime();
if ($options['PRINTDB'])
print ($t2-$t1)." select id from $tablename where tag='$fulltag' and".
" value='$value'\n";
if (!$res)
fwrite($errorfp, "failed query: select id from $tablename where tag='$fulltag' ".
"and value='$value'\n");
# if an entry already exists with this couple tag/value
$found = false;
while (!$found && $row = mysql_fetch_row($res)) {
if ($row[1] == "$value") {
$found = true;
$idbibxxx = $row[0];
}
}
if (!$found)
{
#create
$t1 = getmicrotime();
$query = "INSERT INTO $tablename (tag,value) VALUES ('$fulltag','$value')";
if ($options['PRINTDB'])
print "Query: $query ... \n";
mysql_perform_query($query);
$t2 = getmicrotime();
if ($options['PRINTDB'])
print " Query took: ".($t2-$t1)." seconds.\n";
#get the id back
$idbibxxx = mysql_insert_id();
}
#then create the bibrec_bibxxx entry with position number
$t1 = getmicrotime();
$query = "INSERT INTO bibrec_$tablename VALUES ($recid,$idbibxxx,$position)";
if ($options['PRINTDB'])
print "Query: $query ...\n";
mysql_perform_query($query);
$t2 = getmicrotime();
if ($options['PRINTDB'])
print " Query took: ".($t2-$t1)." seconds.\n";
}
function display($rec)
{
global $nbRecords,$options;
#for each field
$nbfields = $rec->nbfields;
print "\n\nRecord $nbRecords: $nbfields fields\n";
for ($j=0;$j<=$nbfields;$j++)
{
if ($rec->fields[$j])
{
$field = $rec->fields[$j];
$tag = $field->type;
$i1 = $field->i1;
$i2 = $field->i2;
$tablenumber = substr($tag,0,2);
$tablename = "bib" . $tablenumber . "x";
print "field $j: $tag$i1$i2\n";
#main value
if ($field->value != "")
{
$fulltag = "$tag${i1}${i2}_";
$value = $field->value;
print " main value: $value\n";
}
#sub fields
$nbsubfields = $field->nbsubfields;
if ($nbsubfields != 0)
{
for ($k=0;$k<$nbsubfields;$k++)
{
$subfield = $field->subfields[$k];
$subfieldcode = $subfield->subfieldcode;
print " subfield $k: ($subfieldcode) " .
$subfield->value . "\n";
}
}
}
}
}
function integrate ($rec)
{
global $nbRecords, $nbUpdatedRecords, $nbInsertedRecords, $nbErrorRecords, $xml, $options;
global $errorfp;
// get sysno
$recid = $rec->recid;
// display record name
if (!$options['MUTE']) {
print ("Record #".($nbRecords+1)." ");
}
// no record matching the main criteria was found
if ($recid == "000") {
// test on A500 system number
$oldsysno = strtoupper($rec->get_subfield("970","_","_","a"));
$oldsysnoExists = Test_In_DB($oldsysno,"970__a","bib97x");
if ($oldsysnoExists)
$recid = $oldsysnoExists;
// if still not found and options['REF'] is found, we check against
// the 037 tag
if ($recid == "000" && $options['REF']) {
$myfield = $rec->get_subfield("037","_","_","a");
$myfieldExists = Test_In_DB($myfield,"037__a","bib03x");
if (!$myfieldExists)
$myfieldExists = Test_In_DB($myfield,"088__a","bib08x");
if (!$myfieldExists)
$myfieldExists = Test_In_DB($myfield,"088__9","bib08x");
if ($myfieldExists)
$recid = $myfieldExists;
}
if ($recid != "000") {
// a record matching the additional criteria was found
if ($options['DISPLAY'])
display($rec);
update($rec,$recid,$logfp);
$nbUpdatedRecords++;
}
else {
// no matching record was found
if ($options['DISPLAY'])
display($rec);
if (!$options['FORMAT']) {
if (!$options['INSERT']) {
fwrite($errorfp,"Error: Cannot update record. Record not found\n");
fwrite($errorfp,$xml."\n");
$nbErrorRecords++;
print (" [Error]\n");
}
else {
insert($rec,$logfp);
$nbInsertedRecords++;
}
}
}
}
else {
// this is an update
if ($options['DISPLAY'])
display($rec);
update($rec,$recid,$logfp);
$nbUpdatedRecords++;
}
}
function get_next_record($fp)
{
global $options;
static $line = ""; // needed static so as to preserve line between successive calls to get_next_record()
$xmlrecord = "";
// get begining of next record
$line = stristr($line,"</record"); // useful if there were more records in the same line
while (!stristr($line,"<record") && !feof($fp))
$line = fgets($fp, 4096);
// if the end of the file is reached without finding any record
if (feof($fp))
return "";
// get full record
$line = stristr($line,"<record"); // continue after the above "<record" match only
$xmlrecord = $line;
while (!stristr($line,"</record>") && !feof($fp))
{
$line = fgets($fp, 4096);
$xmlrecord .= ereg_replace("<record.*","",$line); // get rid of the beginning of the next record, if any
}
return $xmlrecord;
}
function getmicrotime()
{
list($usec, $sec) = explode(" ",microtime());
return ((float)$usec + (float)$sec);
}
function formatfield($field)
{
return $field; # FIXME: the real body function below switched off since it apparently breaks UTF-8
// whatever character encoding in input, we want UTF8 in output
// to check if the file is Latin-1, we use the fact that there
// is a loss when you utf8_decode a latin-1 string
if ($field != utf8_encode(utf8_decode($field))) {
$field = utf8_encode($field);
}
return $field;
}
function parse_command($argv)
{
global $options,$errorfp;
$options['TASKRUN'] = FALSE;
$options['DISPLAY'] = FALSE;
$options['PRINTDB'] = FALSE;
$options['MUTE'] = FALSE;
$options['INSERT'] = FALSE;
$options['REPLACE'] = FALSE;
$options['APPEND'] = FALSE;
$options['CORRECT'] = FALSE;
$options['FORMAT'] = FALSE;
$options['REF'] = FALSE;
$options['NOTIMECHANGE'] = FALSE;
$options['HBFMTUPDATE'] = FALSE;
$options['USER'] = "";
$options['DATE'] = "";
$verbosity = 1;
$i = 1;
if (count($argv) < 2)
{
fwrite($errorfp, "Bad parameter count!\n");
displayhelp();
exit;
}
elseif (count($argv) == 2 && $argv[1] == strval(intval($argv[1])))
{
$options['TASKRUN'] = $argv[1];
return;
}
else
{
while (substr($argv[$i],0,1) == "-")
{
if ($argv[$i] == "-h" || $argv[$i] == "--help")
{
displayhelp();
exit;
}
if ($argv[$i] == "-V" || $argv[$i] == "--version")
{
displayversion();
exit;
}
else if ($argv[$i] == "-u" || $argv[$i] == "--user") {
$i++;
$options['USER'] = $argv[$i];
}
else if ($argv[$i] == "-t" || $argv[$i] == "--time") {
$i++;
$options['DATE'] = $argv[$i];
}
else if ($argv[$i] == "-v" || $argv[$i] == "--verbose") {
$i++;
$verbosity = $argv[$i];
}
else if ($argv[$i] == "-r" || $argv[$i] == "--replacerecord" )
$options['REPLACE'] = TRUE;
else if ($argv[$i] == "-a" || $argv[$i] == "--appendfield")
$options['APPEND'] = TRUE;
else if ($argv[$i] == "-c" || $argv[$i] == "--correctfield")
$options['CORRECT'] = TRUE;
else if ($argv[$i] == "-f" || $argv[$i] == "--format")
$options['FORMAT'] = TRUE;
else if ($argv[$i] == "-n" || $argv[$i] == "--notimechange")
$options['NOTIMECHANGE'] = TRUE;
else if ($argv[$i] == "-b" || $argv[$i] == "--brieffmtupdate")
$options['HBFMTUPDATE'] = TRUE;
else if ($argv[$i] == "-i" || $argv[$i] == "--insertrecord")
$options['INSERT'] = TRUE;
else if ($argv[$i] == "-z" || $argv[$i] == "--ref") {
$options['CORRECT'] = TRUE;
$options['REF'] = TRUE;
}
else
{
fwrite($errorfp, "Unrecognized option " . $argv[$i] . "!\n");
displayhelp();
exit;
}
$i++;
}
}
if ($verbosity == 0)
$options['MUTE'] = TRUE;
if ($verbosity > 1)
$options['DISPLAY'] = TRUE;
if ($verbosity > 2)
$options['PRINTDB'] = TRUE;
if ($options['USER'] == "")
$options['USER'] = "nobody";
if (!$options['APPEND'] && !$options['REPLACE'] && !$options['FORMAT'] && !$options['CORRECT'] && !$options['INSERT']) {
fwrite($errorfp, "\nPlease specify at least one update/insert mode!\n\n");
exit;
}
if ( $options['REPLACE'] + $options['APPEND'] + $options['CORRECT'] + $options['FORMAT'] > 1 ) {
fwrite($errorfp, "\nYou can only specify ONE update mode!\n\n");
exit;
}
if ( $options['APPEND'] + $options['CORRECT'] + $options['FORMAT'] + $options['INSERT'] > 1 ) {
fwrite($errorfp, "\nINSERT mode can only be mixed with REPLACE update mode!\n\n");
exit;
}
$options['recordfiles'] = array_slice($argv, $i);
if (!count($options['recordfiles']))
{
fwrite($errorfp, "\nMissing filename!\n\n");
displayhelp();
exit;
}
}
function displayversion()
{
global $__version__;
print $__version__."\n";
}
function displayhelp()
{
global $errorfp;
fwrite($errorfp, "Usage: bibupload [options] file1.xml [file2.xml ...]
Uploads xml records in the bibliographical database.
Bibupload first tries to match the xml record with an existing one.
o If the match succeeds and an update mode is selected, the matched record
is updated
o If the match succeeds and no update mode is selected, an error occurs
o If the match fails and an insert mode is selected, the record is added as new
o If the match fails and no insert mode is selected, an error occurs
Options:
-h, --help print this help
-V, --version print version information
-u, --user choose user
-v, --verbose verbosity level (0=mute, 1=default info msg,
2=display record analysis, 3=display integration requests,
9=max information)
-n, --notimechange
do not change record last modification date when updating
-b, --brieffmtupdate
force brief format recreation at next reformatting
(sets bibfmt hb last_update to 1970)
-t, --time sets date/time of execution (format 'yyyy-mm-dd hh:mm:ss')
INSERT Mode
-i, --insertrecord unmatched records are added as new.
this can be mixed with -r.
UPDATE Modes:
-r, --replacerecord the existing record is entirely replaced by the new one
-a, --appendfield new fields are appended to the existing record
-c, --correctfield fields are replaced by the new ones in the existing
record
-f, --format takes only the FMT fields into account. Does not update
the modification stamp either.
-z, --ref update references (update only 999 fields and check 037 field
against both 037 and 088.)
Example:
./bibupload -t '2004-10-24 05:00' -i toto.xml\n");
}
function task_run()
{
global $maxfieldno,$index,$options,$xml,$nbTags,$nbUpdatedRecords,$nbInsertedRecords,$nbErrorRecords, $nbRecords,$currentfield,$i,$stopstate,$sleepstate,$errorfp,$validrec;
// read task options:
$task_id = $options["TASKRUN"]; // remember task id
$options = array();
$options = task_get_options($task_id); // get options from BibSched task table
$options["TASKRUN"] = $task_id;
if (task_get_state() != "WAITING") {
$progress = task_get_progress();
}
else {
$progress = 0;
}
task_update_state("RUNNING");
$starttime = getmicrotime();
$vals = array();
$nbRecords = 0;
$totalrecs = 0;
foreach ($options['recordfiles'] as $recordfile) {
//open xml file
$fp = @fopen("$recordfile","r");
if (!$fp) {
fwrite($errorfp, "Error: cannot localize and/or open file $recordfile for reading.\n");
task_update_state("ERROR");
exit(1);
}
else {
$totalrecs += `grep "<record" $recordfile | wc -l`;
if (!$options['MUTE'])
print "\nInput file: $recordfile\n";
$xml = "";
$xml = get_next_record($fp);
while ($xml != "") {
$i = 0;
$validrec = true;
$xml = formatfield($xml);
// create XML structure
$p = xml_parser_create();
xml_parser_set_option($p, XML_OPTION_SKIP_WHITE, 1);
xml_parse_into_struct($p,$xml,$index,$vals);
xml_parser_free($p);
$nbTags = count($index);
// record creation
$rec = new record();
// start insertion where the program last stopped
if ($nbRecords >= $progress) {
if ($validrec) {
integrate($rec);
}
else {
$nbErrorRecords++;
fwrite($errorfp,$xml."\n");
}
}
unset($rec);
unset($GLOBALS['maxfieldno']);
$nbRecords++;
$totalrecs = ereg_replace("[\t\n\r ]+","",$totalrecs);
if ($nbRecords > $progress) {
task_update_progress("Done ".$nbRecords." out of ".$totalrecs.".");
}
if ($stopstate) {
task_sig_stop($stopstate);
}
if ($sleepstate) {
task_sig_sleep($sleepstate);
}
$xml = get_next_record($fp);
}
fclose($fp);
}
}
if(!$options['MUTE']) {
print "\nIdentified: $nbRecords records\n";
print "Updated: $nbUpdatedRecords records\n";
print "Inserted: ".$nbInsertedRecords." records\n";
print "Errors: ".$nbErrorRecords." records\n";
$endtime = getmicrotime();
$time = $endtime - $starttime;
$meantreatment = ($nbRecords != 0 ? ($time/$nbRecords) : "-");
print "in $time seconds\n";
printf("Average record treatment time: %.2f seconds", $meantreatment);
}
if ($nbErrorRecords==0)
task_update_state("DONE");
else
task_update_state("DONE WITH ERRORS");
}
///////////////////////////////////////////////////////////////////////
// MAIN SCRIPT //
///////////////////////////////////////////////////////////////////////
$errorfp = fopen("php://stderr","w");
pcntl_signal(SIGUSR1, "task_sig_sleep");
pcntl_signal(SIGTSTP, "task_sig_sleep");
pcntl_signal(SIGTERM, "task_sig_stop");
pcntl_signal(SIGINT, "task_sig_stop");
pcntl_signal(SIGABRT, "task_sig_suicide");
pcntl_signal(SIGCONT, "task_sig_wakeup");
$maxfieldno = array();
$index = array();
$options = array();
$nbUpdatedRecords = 0;
$nbInsertedRecords = 0;
$nbErrorRecords = 0;
$stopstate = 0;
$sleepstate = 0;
// Analyse of the command-line arguments
// This program should take the path to the record file as parameter
parse_command($argv);
// connect to mysql
$sock = @mysql_connect("${mysql_machine}","${mysql_docid}","${mysql_docpw}")
or die("Sorry, cannot connect to SQL server.\nPlease try later.\n");
mysql_select_db("${mysql_dbname}")
or die("Sorry, cannot choose ".${mysql_dbname}." database. Please try later.\n");
// do what is required:
if ($options['TASKRUN']) {
task_run();
}
else {
task_submit();
}
?>
</protect>

Event Timeline