diff --git a/modules/bibconvert/bin/bibconvert.in b/modules/bibconvert/bin/bibconvert.in index 5cef9f010..2382ec3b3 100644 --- a/modules/bibconvert/bin/bibconvert.in +++ b/modules/bibconvert/bin/bibconvert.in @@ -1,284 +1,284 @@ #!@PYTHON@ ## -*- mode: python; coding: utf-8; -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibConvert tool to convert bibliographic records from any format to any format.""" __revision__ = "$Id$" try: import fileinput import string import os import re import sys import time import getopt from time import gmtime, strftime, localtime import os.path except ImportError, e: print "Error: %s" % e import sys sys.exit(1) try: from invenio.search_engine import perform_request_search from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION from invenio.config import * from invenio import bibconvert from invenio import bibconvert_bfx_engine from invenio import bibconvert_xslt_engine except ImportError, e: print "Error: %s" % e sys.exit(1) def printInfo(): "print out when not enough parmeters given" print """ BibConvert data convertor. Usage: [options] < input.dat Examples: bibconvert -ctemplate.cfg < input.dat bibconvert -ctemplate.bfx < input.xml bibconvert -ctemplate.xsl < input.xml XSL and BFX options: - -c, --config configuration templates file + -c, --config transformation stylesheet file - Text/line-oriented options: - -c, --config configuration templates file + Plain text-oriented options: + -c, --config configuration template file -d, --directory source_data fields are located in separated files in 'directory' -h, --help print this help -V, --version print version number -l, --length minimum line length (default = 1) -o, --oai OAI identifier starts with specified value (default = 1) -b, --header insert file header -e, --footer insert file footer -B, --record-header insert record header -E, --record-footer insert record footer -s, --separator record separator, default empty line (EOLEOL) -t, --output_separator -m0, match records using query string, output *unmatched* -m1, match records using query string, output *matched* -m2, match records using query string, output *ambiguous* -Cx, alternative to -c when config split to several files, *extraction* -Cs, alternative to -c when config split to several files, *target* -Ct, alternative to -c when config split to several files, *source* BibConvert can convert: - XML data using XSL or bfx templates. - - Textual (line-oriented) data using cfg templates files. + - Plain text data using cfg templates files. - Text/line-oriented options are not available with .xsl and .bfx configuration files + Plain text-oriented options are not available with .xsl and .bfx configuration files """ ### MAIN ### ar_ = [] conv_setting = bibconvert.set_conv() sysno = bibconvert.generate("DATE(%w%H%M%S)") sysno500 = bibconvert.generate("DATE(%w%H%M%S)") separator = "" tcounter = 0 source_data = "" query_string = "" match_mode = -1 begin_record_header = "" ending_record_footer = "" output_rec_sep = "" begin_header = "" ending_footer = "" oai_identifier_from = 1 extract_tpl = "" opts, args = getopt.getopt(sys.argv[1:],"c:d:hVl:o:b:e:B:E:s:m:C:", [ "config", "directory", "help", "version", "length", "oai", "header", "footer", "record-header", "record-footer", "separator", "match", "config-alt" ]) # get options and arguments dirmode = 0 Xcount = 0 for opt, opt_value in opts: if opt in ["-c", "--config"]: if opt_value.endswith('.'+ CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION): pass elif opt_value.endswith('.xsl'): pass else: separator = bibconvert.get_other_par("_RECSEP_", opt_value) output_rec_sep = "" query_string = bibconvert.get_other_par("_QRYSTR_", opt_value) match_mode = bibconvert.get_other_par("_MATCH_", opt_value) begin_header = bibconvert.get_other_par("_HEAD_", opt_value) ending_footer = bibconvert.get_other_par("_FOOT_", opt_value) begin_record_header = bibconvert.get_other_par("_RECHEAD_", opt_value) ending_record_footer = bibconvert.get_other_par("_RECFOOT_", opt_value) if(match_mode == ""): match_mode = -1 for opt, opt_value in opts: if opt in ["-c", "--config"]: extract_tpl = opt_value if opt_value.endswith('.'+ CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION): pass elif opt_value.endswith('.xsl'): pass else: extract_tpl_parsed = bibconvert.parse_common_template(extract_tpl,1) source_tpl = opt_value source_tpl_parsed = bibconvert.parse_common_template(source_tpl,2) target_tpl = opt_value target_tpl_parsed = bibconvert.parse_common_template(target_tpl,3) elif opt in ["-d", "--directory"]: source_data = opt_value source_data = source_data + "/" extract_tpl = "/" extract_tpl_parsed = None dirmode = 1 elif opt in ["-h", "--help"]: printInfo() sys.exit(0) elif opt in ["-V", "--version"]: print __revision__ sys.exit(0) elif opt in ["-l", "--length"]: try: conv_setting[0] = string.atoi(opt_value) except ValueError, e: conv_setting[0] = 1 elif opt in ["-o", "--oai"]: try: oai_identifier_from = string.atoi(opt_value) except ValueError, e: oai_identifier_from = 1 elif opt in ["-b", "--header"]: begin_header = opt_value elif opt in ["-e", "--footer"]: ending_footer = opt_value elif opt in ["-B", "--record-header"]: begin_record_header = opt_value elif opt in ["-E", "--record-footer"]: ending_record_footer = opt_value elif opt in ["-s", "--separator"]: separator = opt_value elif opt in ["-t", "--output_separator"]: output_rec_sep = opt_value elif opt in ["-m", "--match"]: match_mode = string.atoi(opt_value[0:1]) query_string = opt_value[1:] elif opt in ["-C", "--config-alt"]: if opt_value[0:1] == "x": extract_tpl = opt_value[1:] extract_tpl_parsed = bibconvert.parse_template(extract_tpl) if opt_value[0:1] == "t": target_tpl = opt_value[1:] target_tpl_parsed = bibconvert.parse_template(target_tpl) if opt_value[0:1] == "s": source_tpl = opt_value[1:] source_tpl_parsed = bibconvert.parse_template(source_tpl) # Check if required arguments were given if(extract_tpl == ""): printInfo() sys.exit(0) # Redirect to processor for given config file if opt_value.endswith('.'+ CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION): # BibConvert for bfx source_xml = sys.stdin.read() print bibconvert_bfx_engine.convert(source_xml, extract_tpl) elif opt_value.endswith('.xsl'): # BibConvert for XSLT source_xml = sys.stdin.read() print bibconvert_xslt_engine.convert(source_xml, extract_tpl) else: # BibConvert for cfg if(separator == "EOLEOL"): separator = "" ar_.append(dirmode) ar_.append(Xcount) ar_.append(conv_setting) ar_.append(sysno) ar_.append(sysno500) ar_.append(separator) ar_.append(tcounter) ar_.append(source_data) ar_.append(query_string) ar_.append(match_mode) ar_.append(begin_record_header) ar_.append(ending_record_footer) ar_.append(output_rec_sep) ar_.append(begin_header) ar_.append(ending_footer) ar_.append(oai_identifier_from) ar_.append(source_tpl) ar_.append(source_tpl_parsed) ar_.append(target_tpl) ar_.append(target_tpl_parsed) ar_.append(extract_tpl) ar_.append(extract_tpl_parsed) bibconvert.convert(ar_) diff --git a/modules/bibconvert/doc/admin/guide.html.wml b/modules/bibconvert/doc/admin/guide.html.wml index fd16164a6..a2a36d000 100644 --- a/modules/bibconvert/doc/admin/guide.html.wml +++ b/modules/bibconvert/doc/admin/guide.html.wml @@ -1,939 +1,994 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. #include "cdspage.wml" \ title="BibConvert Admin Guide" \ navtrail_previous_links="<a class=navtrail href=<WEBURL>/admin/<lang:star: index.*.html>>_(Admin Area)_</a> > <a class=navtrail href=<WEBURL>/admin/bibconvert/>BibConvert Admin</a>" \ navbar_name="admin" \ navbar_select="bibconvert-admin-guide" <p>Version <: print generate_pretty_revision_date_string('$Id$'); :> <h2>Contents</h2> -<strong>1. <a href="#1">Overview</a></strong><br> -<strong>2. <a href="#2">Configuration Example</a></strong><br> -<strong>3. <a href="#3">Running BibConvert</a></strong><br> -<strong>4. <a href="#4">BibConvert Configuration Guide</a></strong><br> - <a href="#G">Conventions</a><br> - 4.1. <a href="#4.1">Step 1 Definition of Source Record</a><br> - 4.2. <a href="#4.2">Step 2 Definition of Source Fields</a><br> - 4.3. <a href="#4.3">Step 3 Definition of Target Record</a><br> - 4.4. <a href="#4.4">Formatting in BibConvert</a><br> - 4.4.1 <a href="#4.4.1">Definition of Formatting Functions</a><br> - 4.4.2 <a href="#4.4.2">Generated Values</a><br> - -<a name="1"></a><h2>1. Overview</h2> +<strong>A. <a href="#A">Overview</a></strong><br> +<strong>B. <a href="#B">XML-oriented mode</a></strong><br> + <strong>1 <a href="#B.1">Configuration File Examples</a></strong><br> + <strong>2 <a href="#B.2">Running BibConvert</a></strong><br> +<strong>C. <a href="#C">Plain text-oriented mode</a></strong><br> + <strong>1 <a href="#C.1">Configuration File Examples</a></strong><br> + <strong>2 <a href="#C.2">Running BibConvert</a></strong><br> + <strong>3 <a href="#C.3">BibConvert Configuration Guide</a></strong><br> + <a href="#G">Conventions</a><br> + 3.1 <a href="#C.3.1">Step 1 Definition of Source Record</a><br> + 3.2 <a href="#C.3.2">Step 2 Definition of Source Fields</a><br> + 3.3 <a href="#C.3.3">Step 3 Definition of Target Record</a><br> + 3.4 <a href="#C.3.4">Formatting in BibConvert</a><br> + 3.4.1 <a href="#C.3.4.1">Definition of Formatting Functions</a><br> + 3.4.2 <a href="#C.3.4.2">Generated Values</a><br> + +<a name="A"></a><h2>A. Overview</h2> <p>BibConvert utility enables you to convert metadata records from -various metadata formats into another metadata format supported by -local database. It is designed to process metadata records harvested -in XML converting them into MARC21 before they are finally uploaded -into database. However, BibConvert is flexible enough to deal also -with other structured metadata according to your needs and offers a -way, how you actually can insert what you want into the database. +various metadata formats into another metadata format supported by the +CDS Invenio local database. It is designed to process XML harvested +metadata records, converting them into MARC21 before they are uploaded +into the database. However, BibConvert is flexible enough to deal also +with other structured metadata according to your needs, and offers a +way to actually insert what you want into the database. + +<p>BibConvert is suitable for tasks such as conversion of records +received from multiple data sources, or conversion of records from +another system that may support a different metadata format.</p> + + +<p>In order to cover a wider range of possible conversions, BibConvert +has 2 different modes, each dealing with different types of data, and +each using different configuration files.<p> + +<dl> +<dt><b>Plain text-oriented mode:</b></dt> +<dd>Deals with source data being typically structured with line breaks, +and character-based separators. You can use this mode when you need +to process line-based data, such as comma/tab separated values. +Still, this mode is powerful enough to convert +complex structures, at the cost of a more complex configuration.</dd> +<dt><b>XML-oriented mode</b></dt> +<dd>Convert source data being encoded in XML. Provided you have installed +a supported XSLT processor on your machine, BibConvert can make use of standard +XSLT to interpret your XML data. +</dd> +</dl> +<p>In addition to XSLT, we provide a home-made solution for converting +XML source data. It uses our own BFX language as transformation language, +extended with XPath for node selections.<br/> +You should consider using this solution only in the case where you have not installed (or do not +want to install) an XSLT processor on your machine. +</p> + + +<a name="B"></a><h2>B XML-Oriented Mode</h2> +<a name="B.1"></a><h3>1 Configuration File Examples</h3> +<h4>Using XSLT</h4> +You can use standard XSL stylesheets to transform your source data. Have a look +at the provided samples in <code>etc/bibconvert/config</code> directory +of your CDS Invenio installation.<br/> +<h4>Using BFX</h4> +BFX (BibFormat for XML) uses a syntax similar to XSLT. Roughly they only +differ in the name of the tags.<br/> +More documentation about is to be added soon (FIXME).<br/> +Have a look +at the provided samples in <code>etc/bibconvert/config</code> directory +of your CDS Invenio installation to learn more about it. +<a name="B.2"></a><h3>2. Running BibConvert</h3> +BibConvert in XML-oriented mode has only 1 parameter: <code>-c</code>. +It is used to specify which transformation stylesheet to apply to the piped XML. +<blockquote> +<pre> +$ bibconvert -coaidc2marcxml.xsl < sample.xml > /tmp/record.xml +</pre> +</blockquote> -<p>It is suitable for tasks such as conversion of records received -from multiple data sources or conversion of records from another -system that may support a different metadata format. +If the stylesheet you want to use is installed in the <code>etc/bibconvert/config</code> +directory of your CDS Invenio installation, then you can just refer to it by its filename. +Otherwise use the full path to the file. -<a name="2"></a><h2>2. Configuration Example</h2> +<a name="C"></a><h2>C Plain Text-Oriented Mode</h2> +<a name="C.1"></a><h3>1 Configuration File Examples</h3> <p>OAI DublinCore into MARC21 and OAI MARC into MARC21 configurations will be provided as default configuration, ensuring the standard uploading sequence (incl. <a href="../bibharvest/">BibHarvest</a> and <a href="../bibupload/">BibUpload</a> utilities). Other configurations can be created according to your needs. The configuration file that has to be created for each data source is a text file with following structure: <protect> <blockquote> <pre> ### the configuration starts here ### Configuration of bibconvert templates ### source data : <source_data_name> === data extraction configuration template === ### here comes the data extraction configuration template # entry example: AU---%A---MAX---;--- # extracts maximum available data by field from metadata record # the values are found between specified tags # in this case between the '%A' tag and other tags defined # repetitive values are recognized by a semicolon separator # resp. by multiple presence of '%A' tag === data source configuration template === ### here comes the data source configuration template # entry example: AU---<:FIRSTNAME:>-<:SURNAME:> # describes the contents of extracted source data fields # in this case, the field AU is described as having two distinct subfields === data target configuration template === ### here comes the data target configuration template # entry example: AU::CONF(AU,,0)---<datafield id="700" ind1="" ind2=""><subfield code="a"><:AU*::SURNAME::CAP():>, <AU*::FIRSTNAME::ABR():></subfield></datafield> # This section concerns rather the desired output, while previous two were focused on the data source structures. # Each line equals to one output line, composed of given literals and values from extracted source data fields. # In this example, the XML Marc21 output line is defined, # containing re-formatted values of source fields SURNAME and FIRSTNAME ### the configuration ends here </pre> </blockquote> </protect> <p>Having prepared a configuration, the BibConvert will convert the source data file according to it in a batch mode. The BibConvert is fully compatible with the Uploader1.x configuration language. For more -information, have a look at the <a href="#4">BibConvert Configuration +information, have a look at the <a href="#C">BibConvert Configuration Guide</a> section below. -<a name="3"></a><h2>3. Running BibConvert</h2> + +<a name="C.2"></a><h3>2. Running BibConvert</h3> <p>For a fully functional demo, consider the following sample input data: <blockquote> <a href="sample.dat">sample.dat</a> -- sample bibliographic data to be converted and inputted into CDS Invenio <br><a href="sample.cfg">sample.cfg</a> -- sample configuration file, featuring knowledge base demo </blockquote> <p>To convert the above data into XML MARC, use the following command: <blockquote> <pre> $ bibconvert -b'<collection>' -csample.cfg -e'</collection>' < sample.dat > /tmp/sample.xml -<pre> +</pre> </blockquote> and see the XML MARC output file. You would then continue the upload procedure by calling <a href="../bibupload/">BibUpload</a>. <p>Other useful BibConvert configuration examples: <blockquote> <a href="dcq.cfg">dcq.cfg</a> -- Qualified Dublin Core in SGML to XML MARC example <br><a href="dcq.dat">dcq.dat</a> -- corresponding data file, featuring collection identifiers demo </blockquote> <blockquote> <a href="dcxml-to-marcxml.cfg">dcxml-to-marcxml.cfg</a> -- OAI XML Dublin Core to XML MARC example </blockquote> <blockquote> <a href="bibtex.cfg">bibtex.cfg</a> -- BibTeX to XML MARC example </blockquote> -<a name="4"></a><h2>4. BibConvert Configuration Guide</h2> +<a name="C.3"></a><h3>3 BibConvert Configuration Guide</h3> -<h3><A NAME="G"></A>Conventions</h3> +<h4><A NAME="G"></A>Conventions</h4> <br/>- comment line starts with '#' sign in the first column <br/>- each section is declared by a line starting with '===' (further characters on the line are ignored) <br/>- values are separated by '---' -<h3> -<A NAME="4.1"></A>4.1. Step 1 Definition of Source record</h3> +<h4><A NAME="C.3.1"></A>3.1 Step 1 Definition of Source record</h4> <P>- Create/edit "data extraction configuration template" section of the configuration file. <BR>- Each line of this section stands for a definition of one source field: <P><strong>name---keyword---terminating string---separator---</strong> <P>- Choose a (valid) name allowed by the system <BR>- Enter <strong>keyword</strong> and <strong>terminating string</strong>, which are boundary tags for the wanted value extraction <BR>- In case the field is repetitive, enter the value <strong>separator</strong> <BR>- "<strong>---</strong>"is mandatory separator between all values, even zero-length <BR>- <strong>MAX</strong>/<strong>MIN</strong> keywords can be used instead of terminating string <BR> <P>Example of a definition of author(repetitive) and title (non-repetitive) fields: <br> <pre> === data extraction configuration template === ### here comes the data extraction configuration template <br> <P>AU---AU_---MAX---;--- <BR>TI---TI_---EOL------ </pre> -<H3> -<A NAME="4.2"></A>4.2. Step 2 Definition of Source fields</H3> +<H4><A NAME="C.3.2"></A>3.2 Step 2 Definition of Source fields</H4> <I>Each field extracted from the source according to the definition done in the first step can have an internal structure, which is described in this section.</I> <P>- Create/edit "data source configuration template" section of the configuration file. <BR>- Each line of this section stands for a definition of one source field <BR>- <name> corresponds to the name defined in the step 1 <P>name---{CONST<:SUBFIELD:>[CONST]}} <P>- Enter only constants that appear systematically. <BR>- Between two discrete subfields has to be defined a constant of a non zero length <BR>- "---"is a mandatory separator between the name and the source field definition <P>Example of a definition of author(repetitive) and title (non-repetitive) fields: <pre> === data source configuration template === TI---<:TI:> AU---<:FIRSTNAME:>-<:SURNAME:> </pre> -<H3> -<A NAME="4.3"></A>4.3. Step 3 Definition of target record</H3> +<H4><A NAME="C.3.3"></A>3.3 Step 3 Definition of target record</H4> <I>This definition describes the layout of the target record that is created by the conversion, together with the corresponcence to the source fields defined in step 2.</I> <P>- Create/edit "data target configuration template" section of the configuration file. <BR>- Each line of this section stands for an output line created by the conversion. <BR>- <name> corresponds to the name defined in the steps 1 and 2 <P>CODE---CONST<:name::SUBFIELD::FUNCT():>CONST<:GENERATED_VALUE:> <P>- <strong>CODE</strong> stands for a tag for readability (optional) <BR>- "<strong>::</strong>"is a mandatory separator between the name and the subfield definition -<BR>- optionally, you can apply the appropriate <A HREF="#51">formatting function(s)</A> -and <A HREF="#4.4.2">generated values</A> +<BR>- optionally, you can apply the appropriate <A HREF="#C.3.4.1">formatting function(s)</A> +and <A HREF="#C.3.4.2">generated values</A> <BR>- "<strong>::</strong>"is a mandatory separator between the subfield definition and the function(s) <BR>- "<strong>---</strong>"is a mandatory separator between the tag and the output code definition <BR>- mark repetitive source fields with an asterisk (*) <P>Example of a definition of author (repetitive) and title (non-repetitive) codes: <protect> <pre> <br/>AU::CONF(AU,,0)---<datafield id="700" ind1="" ind2=""><subfield code="a"><:AU*::AU:></subfield></datafield> <br/>TI::CONF(TI,,0)---<datafield id="245" ind1="" ind2=""><subfield code="a"><:TI::TI::SUP(SPACE, ):></subfield></datafield> </pre> </protect> -<h3><A NAME="4.4"></a>4.4 Formatting in BibConvert</H3> +<h4><A NAME="C.3.4"></a>3.4 Formatting in BibConvert</H4> -<H3> <A NAME="4.4.1"></A>4.4.1 Definition of formatting functions</H3> +<H5> <A NAME="C.3.4.1"></A>3.4.1 Definition of formatting functions</H5> <BLOCKQUOTE>Every field can be processed with a variety of functions that partially or entirely change the original value. <BR>There are three types of functions available that take as element either single characters, words or the entire value of processed field. <BR> <P>Every function requires a certain number of parameters to be entered in brackets. If an insufficient number of parameters is present, the function uses default values. Default values are constructed with attempt to keep the original value. <P>The configuration of templates is case sensitive. <P>The following functions are available: <P><A HREF="#ADD">ADD(prefix,suffix) - add prefix/suffix</A> <BR><A HREF="#KB">KB(kb_file,[0-9]) -lookup in kb_file and replace value</A> <BR><A HREF="#ABR">ABR(x,suffix)/ABRW(x,suffix) - abbreviation with suffix addition</A> <BR><A HREF="#ABRX">ABRX() - abbreviate exclusively words longer</A> <BR><A HREF="#CUT">CUT(prefix,postfix) - remove substring from side</A> <BR><A HREF="#REP">REP(x,y) - replacement of characters</A> <BR><A HREF="#SUP">SUP(type) - suppression of characters of specified type</A> <BR><A HREF="#LIM">LIM(n,L/R)/LIMW(str,L/R) - restriction to n letters</A> <BR><A HREF="#WORDS">WORDS(n,side) - restriction to n words from L/R</A> <BR><A HREF="#MINL">MINL(n)/MAXL(n) - replacement of words shorter/greater than n</A> <BR><A HREF="#MINLW">MINLW(n) - replacement of short values</A> <BR><A HREF="#EXPW">EXP(str,1|0)/EXPW(type) - replacement of words from value if containing spec. type/string</A> <BR><A HREF="#IF">IF(value,valueT,valueF) - replace T/F value</A> <BR><A HREF="#UP">UP/DOWN/CAP/SHAPE/NUM - lower case and upper case, shape</A> <BR><A HREF="#SPLIT">SPLIT(n,h,str,from)/SPLITW(sep,h,str,from) - split into more lines</A> <BR><A HREF="#CONF">CONF(field,value,1/0)/CONFL(value,1/0) - confirm validity of a field</A> <BR><A HREF="#RANGE">RANGE(from,to) - confirm only entries in the specified range</A> <BR> </BLOCKQUOTE> <H4> <A NAME="ADD"></A>ADD(prefix,postfix)</H4> <BLOCKQUOTE>default: ADD(,) no addition <P>Adds prefix/postfix to the value, we can use this function to add the proper field name as a prefix of the value itself: <P>ADD(WAU=,) prefix for the first author (which may have been taken from the field AU2) <BR> </BLOCKQUOTE> <H4> <A NAME="KB"></A>KB(kb_file) - kb_file search</H4> <BLOCKQUOTE>default: KB(kb_file,1/0/R) <P>The input value is compared to a kb_file and may be replaced by another value. In the case that the input value is not recognized, it is by default kept without any modification. This default can be overridden by <strong>_DEFAULT_---default value</strong> entry in the kb_file <P>The file specified in the parameter is a text file representing a table of values that correspond to each other: <P>{<strong>input_value---output_value</strong>} <P>KB(file,1) searches the exact value passed. <BR>KB(file,0) searches the KB code inside the value passed. <BR>KB(file,2) as 0 but not case sensitive <BR>KB(file,R) replacements are applied on substrings/characters only. <BR/> <BR/> bibconvert look-up value in KB_file in one of following modes: <BR/> =========================================================== <BR/> 1 - case sensitive / match (default) <BR/> 2 - not case sensitive / search <BR/> 3 - case sensitive / search <BR/> 4 - not case sensitive / match <BR/> 5 - case sensitive / search (in KB) <BR/> 6 - not case sensitive / search (in KB) <BR/> 7 - case sensitive / search (reciprocal) <BR/> 8 - not case sensitive / search (reciprocal) <BR/> 9 - replace by _DEFAULT_ only <BR/> R - not case sensitive / search (reciprocal) replace <BR/> <BR/> <P>Edge spaces are not considered. Output value is not further formated.</BLOCKQUOTE> <H4> <A NAME="ABR"></A>ABR(x,trm),ABRW(x,trm) - abbreviate term to x places with(out) postfix</H4> <BLOCKQUOTE>default: ABR(1,.) <BR>default: ABRW(1,.) <P>The words in the input value are shortened according to the parameters specified. By default, only the initial character is kept and the output value is terminated by a dot. <BR>ABRW takes entire value as one word. <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>ABR()</TD> <TD>firstname_surname</TD> <TD>f._s.</TD> </TR> <TR> <TD>ABR(1,)</TD> <TD>firstname_surname</TD> <TD>f_s</TD> </TR> <TR> <TD>ABR(10,COMMA)</TD> <TD>firstname_surname</TD> <TD>firstname,_surname,</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <h4> <A NAME="ABRX"></A>ABRX() - abbreviate exclusively words longer than given limit</h4> <BLOCKQUOTE>default: ABRX(1,.) <P>Exclusively words that reach the specified length limit in the input value are abbreviated. No suffix is appended to the words shorter than specified limit. </blockquote> <h4> <A NAME="CUT"></A>CUT(prefix,postfix) - remove substring from side</h4> <BLOCKQUOTE>default: CUT(,) <P>Remove string from the value (reverse function to the "ADD") </BLOCKQUOTE> <H4> <A NAME="REP"></A>REP(x,y) - replace x with y</H4> <BLOCKQUOTE>default: REP(,) no replacement <P>The input value is searched for the string specified in the first parameter. All such strings are replaced with the string specified in the second parameter. </BLOCKQUOTE> <H4> <A NAME="SUP"></A>SUP(type,string) - suppress chars of certain type</H4> <BLOCKQUOTE>default: SUP(,) type not recognized <P>All groups of characters belonging to the type specified in the first parameter are suppressed or replaced with a string specified in the second parameter. <P>Recognized types: <P>SPACE .. invisible chars incl. NEWLINE <BR>ALPHA .. alphabetic <BR>NALPHA .. not alphabetic <BR>NUM .. numeric <BR>NNUM .. not numeric <BR>ALNUM .. alphanumeric <BR>NALNUM .. non alphanumeric <BR>LOWER .. lower case <BR>UPPER .. upper case <BR>PUNCT .. punctuation <BR>NPUNCT .. not punctuation <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>SUP(SPACE,-)</TD> <TD>sep_1999</TD> <TD>sep-1999</TD> </TR> <TR> <TD>SUP(NNUM)</TD> <TD>sep_1999</TD> <TD>1999</TD> </TR> <TR> <TD>SUP(NUM)</TD> <TD>sep_1999</TD> <TD>sep_</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="LIM"></A>LIM(n,side)/LIMW(str,side) - limit to n letters from L/R</H4> <BLOCKQUOTE>default: LIM(0,) no change <BR>default: LIMW(,R) no change <P>Limits the value in order to get the required number of characters by cutting excess characters from either side. <BR>LIMW removes the Left/Right side to the (str) string. <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>LIM(4,L)</TD> <TD>sep_1999</TD> <TD>1999</TD> </TR> <TR> <TD>LIM(4,R)</TD> <TD>sep_1999</TD> <TD>sep_</TD> </TR> <TR> <TD>LIMW(_,R)</TD> <TD>sep_1999</TD> <TD>sep_</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="WORDS"></A>WORDS(n,side) - limit to n words from L/R</H4> <BLOCKQUOTE>default: WORDS(0,R) <P>Keeps the number of words specified in the first parameter from either side. <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>WORDS(1)</TD> <TD>sep_1999</TD> <TD>1999</TD> </TR> <TR> <TD>WORDS(1,L)</TD> <TD>sep_1999</TD> <TD>sep_</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="MINL"></A>MINL(n) - exp. words shorter than n</H4> <BLOCKQUOTE>default: MINL(1) <P>All words shorter than the limit specified in the parameter are replaced fro mthe sentence. <BR>The words with length exactly n are kept. <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>MINL(2)</TD> <TD>History of Physics</TD> <TD>History of Physics</TD> </TR> <TR> <TD>MINL(3)</TD> <TD>History of Physics</TD> <TD>History Physics</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> MAXL(n) - exp. words longer than n</H4> <BLOCKQUOTE>default: MAXL(0) <P>All words greater in number of characters than the limit specified in the parameter are replaced. Words with length exactly n are kept. <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>MAXL(2)</TD> <TD>History of Physics</TD> <TD>of</TD> </TR> <TR> <TD>MAXL(3)</TD> <TD>History of Physics</TD> <TD>of</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="MINLW"></A>MINLW(n) - replacement of short values</H4> <BLOCKQUOTE>default: MINLW(1) (no change) <P>The entire value is deleted if shorter than the specified limit. <BR>This is used for the validation of created records, where we have 20 characters in the header. <BR>The default validation is MINLW(21), i.e. the record entry will not be consided as valid, unless it contains at least 21 characters including the header. This default setting can be overriden by the -l command line option. <P>In order to increase the necessary length of the output line in the configuration itself, apply the function on the total value: <P>AU::MINLW(25)---CER <:SYSNO:> AU L <:SURNAME:>, <:NAME:> <BR> <BR> </BLOCKQUOTE> <H4> <A NAME="EXPW"></A>EXP(str,1|0) - exp./aprove word containing specified string</H4> <BLOCKQUOTE>default: EXP (,0) leave all value <P>The record is shortened by replacing words containing the specified string. <BR>The second parameter states whether the string approves the word (0) or disables it (1). <P>for example, to get the email address from the value, use the following <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>EXP(@,0)</TD> <TD>mail to: libdesk@cern.ch</TD> <TD>libdesk@cern.ch</TD> </TR> <TR> <TD>EXP(:,1)</TD> <TD>mail to: libdesk@cern.ch</TD> <TD>mail libdesk@cern.ch</TD> </TR> <TR> <TD>EXP(@)</TD> <TD>mail to: libdesk@cern.ch</TD> <TD>libdesk@cern.ch</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> EXPW(type) - exp. word from value if containing spec. type</H4> <BLOCKQUOTE>default: EXPW type not recognized <BR> <P>The sentence is shortened by replacing words containing specified type of character. <P>Types supported in EXPW function: <P>ALPHA .. alphabetic <BR>NALPHA .. not alphabetic <BR>NUM .. numeric <BR>NNUM .. not numeric <BR>ALNUM .. alphanumeric <BR>NALNUM .. non alphanumeric <BR>LOWER .. lower case <BR>UPPER .. upper case <BR>PUNCT .. punctuation <BR>NPUNCT .. non punctuation <P>Note: SPACE is not handled as a keyword, since all space characters are considered as word separators. <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>EXPW(NNUM)</TD> <TD>sep_1999</TD> <TD>1999</TD> </TR> <TR> <TD>EXPW(NUM)</TD> <TD>sep_1999</TD> <TD>sep</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="IF"></A>IF(value,valueT,valueF) - replace T/F value</H4> <BLOCKQUOTE>default: IF(,,) <P>Compares the value with the first parameter. In case the result is TRUE, the input value is replaced with the second parameter, otherwise the input value is replaced with the third parameter. <BR>In case the input value has to be kept, whatever it is, the keyword ORIG can be used (usually in the place of the third parameter) <BR> <BR> <BLOCKQUOTE> <TABLE BORDER COLS=3 WIDTH="50%" NOSAVE > <TR> <TD>example</TD> <TD>input</TD> <TD>output</TD> </TR> <TR> <TD>IF(sep_1999,sep)</TD> <TD>sep_1999</TD> <TD>sep</TD> </TR> <TR> <TD>IF(oct_1999,oct)</TD> <TD>sep_1999</TD> <TD></TD> </TR> <TR> <TD>IF(oct_1999,oct,ORIG)</TD> <TD>sep_1999</TD> <TD>oct_1999</TD> </TR> </TABLE> </BLOCKQUOTE> </BLOCKQUOTE> <H4> <A NAME="UP"></A>UP - upper case</H4> <BLOCKQUOTE>Convert all characters to upper case</BLOCKQUOTE> <H4> DOWN - lower case</H4> <BLOCKQUOTE>Convert all characters to lower case</BLOCKQUOTE> <H4> CAP - make capitals</H4> <BLOCKQUOTE>Convert the initial character of each word to upper case and the rest of characters to lower case</BLOCKQUOTE> <H4> SHAPE - format string</H4> <BLOCKQUOTE>Supresses all invalid spaces</BLOCKQUOTE> <H4> <B>NUM - number</B></H4> <BLOCKQUOTE>If it contains at least one digit, convert it into a number by suppressing other characters. Leading zeroes are deleted.</BLOCKQUOTE> <H4> <A NAME="SPLIT"></A>SPLIT(n,h,str,from)</H4> <BLOCKQUOTE>Splits the input value into more lines, where each line contains at most (n+h+length of str) characters, (n) being the number of characters following the number of characters in the header, specified in (h). The header repeats at the beginning of each line. An additional string can be inserted as a separator between the header and the following value. This string is specified by the third parameter (str). It is possible to restrict the application of (str) so it does not appear on the first line by entering "2" for (from)</BLOCKQUOTE> <H4> SPLITW(sep,h,str,from)</H4> <BLOCKQUOTE>Splits the input value into more lines by replacing the line separator stated in (sep) with CR/LFs. Also, as in the case of the SPLIT function, the first (h) characters are taken as a header and repeat at the beginning of each line. An additional string can be inserted as a separator between the header and the following value. This string is specified by the third parameter (str). It is possible to restrict the application of (str) so it does not appear on the first line by entering "2" for (from)</BLOCKQUOTE> <H4> <A NAME="CONF"></A>CONF(field,value,1/0) - confirm validity of a field</H4> <BLOCKQUOTE>The input value is taken as it is, or refused depending on the value of some other field. In case the other (field) contains the string specified in (value), then the input value is confirmed (1) or refused (0).</BLOCKQUOTE> <H4> CONFL(str,1|0) - confirm validity of a field</H4> <BLOCKQUOTE>The input value is confirmed if it contains (<B>1</B>)/misses(<B>0</B>) the specified string (<B>str</B>)</BLOCKQUOTE> <H4> <A NAME="RANGE"></A>RANGE(from,to) - confirm only entries in the specified range</H4> <BLOCKQUOTE>Left side function of target template configuration section to select the desired entries from the repetitive field. <BR>The range can only be continuous. <P>The entry is confirmed in case its input falls into the range from-to specified in the parameter, border values included. As an upper limit it is possibe to use the keyword MAX. <P>This is useful in case of AU code, where the first entry has a different definition from other entries: <P>AU::RANGE(1,1)---CER <:SYSNO:> AU2 L <:AU::SURNAME:>, <:AU::NAME:> ... takes the first name from the defined AU field <BR>AU::RANGE(2,MAX)---CER <:SYSNO:> AU L <:AU::SURNAME:> , <:AU::NAME:> ... takes the the rest of namesfrom the AU field <BR> </BLOCKQUOTE> <H4><A NAME="DEFP"></A>DEFP() - default print</H4> <BLOCKQUOTE>The value is printed by default even if it does not contain any variable input from the source file.</BLOCKQUOTE> -<H3> -<A NAME="4.4.2"></A>4.4.2 Generated values</H3> +<H4> +<A NAME="C.3.4.2"></A>3.4.2 Generated values</H4> <BLOCKQUOTE>In the template configurations, values can be either taken from the source or generated in the process itself. This is mainly useful for evaluating constant values. <P>Currently, the following date values are generated: <BR> </BLOCKQUOTE> <H4> DATE(format,n)</H4> <BLOCKQUOTE>default: DATE(,10) <P>where n is the number of digits required. <P>Generates the current date in the form given as a parameter. The format has to be given according to the ANSI C notation, i.e. the string is composed out of following components: <P><A NAME="date"></A> %a abbreviated weekday name <BR> %A full weekday name <BR> %b abbreviated month name <BR> %B full month name <BR> %c date and time representation <BR> %d decimal day of month number (01-31) <BR> %H hour (00-23)(12 hour format) <BR> %I hour (01-12)(12 hour format) <BR> %j day of year(001-366) <BR> %m month (01-12) <BR> %M minute (00-59) <BR> %p local equivalent of a.m. or p.m. <BR> %S second (00-59) <BR> %U week number in year (00-53)(starting with Sunday) <BR> %V week number in year <BR> %w weekday (0-6)(starting with Sunday) <BR> %W week number in year (00-53)(starting with Monday) <BR> %x local date representation <BR> %X local time representation <BR> %y year (no century prefix) <BR> %Y year (with century prefix) <BR> %Z time zone name <BR> %% % <BR> </BLOCKQUOTE> <H4> WEEK(diff)</H4> <BLOCKQUOTE>Enters the two-digit number of the current week (%V) increased by specified difference. <BR>If the resulting number is negative, the returned value is zero (00). <BR>Values are kept up to 99, three digit values are shortened from the left. <P>WEEK(-4) returns 48, if current week is 52 <BR>WEEK current week <BR> </BLOCKQUOTE> <H4> SYSNO</H4> <BLOCKQUOTE> <BR>Works the same as DATE, however the format of the resulting value is fixed so it complies with the requirements of further record handling. The format is 'whhmmss', where: <P>w current weekday <BR>hh current hour <BR>mm current minute <BR>ss current second <P>The system number, if generated like this, contains a variable value changing every second. For the system number is an identifier of the record, it is needed to ensure it will be unique for the entire record processed. Unlike the function DATE, which simply generates the value of format given, SYSNO keeps the value persistent throughout the entire record and excludes collision with other records that are generated in period of one week with one second granularity. <P>It is not possible to use the DATE function for generating a system number instead. <P>The system number is unique in range of one week only, according to the current definition. <BR> <BR> </BLOCKQUOTE><H4> OAI</H4> <BLOCKQUOTE> <p/>Inserts OAI identifier incremented by one for earch record Starting value that is used in the first record in the batch job can be specified on the command line using the -o<starting_value> option. </BLOCKQUOTE>