print_table("Formats",run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC"%sqlreclist))# kwalitee: disable=sql
print_table("Mimetypes",run_sql("SELECT COUNT(mime) as c, mime FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY mime ORDER BY c DESC"%sqlreclist))# kwalitee: disable=sql
print_table("Sizes",run_sql("SELECT SUM(filesize) AS c FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true"%sqlreclist))# kwalitee: disable=sql
#epilog="""With <query> you select the range of record/docnames/single files to work on. Note that some actions e.g. delete, append, revise etc. works at the docname level, while others like --set-comment, --set-description, at single file level and other can be applied in an iterative way to many records in a single run. Note that specifing docid(2) takes precedence over recid(2) which in turns takes precedence over pattern/collection search.""",
# to record 1 the file x.pdf specifying the PDF/A and OCRED flags
"""%{'site':CFG_SITE_URL}
query_options=OptionGroup(parser,'Query options')
query_options.add_option('-r','--recids',action="callback",callback=_ids_ranges_callback,type='string',dest='recids',help='matches records by recids, e.g.: --recids=1-3,5-7')
query_options.add_option('-d','--docids',action="callback",callback=_ids_ranges_callback,type='string',dest='docids',help='matches documents by docids, e.g.: --docids=1-3,5-7')
query_options.add_option('-a','--all',action='store_true',dest='all',help='Select all the records')
query_options.add_option("--with-deleted-recs",choices=['yes','no','only'],type="choice",dest="deleted_recs",help="'Yes' to also match deleted records, 'no' to exclude them, 'only' to match only deleted ones",metavar="yes/no/only",default='no')
query_options.add_option("--with-deleted-docs",choices=['yes','no','only'],type="choice",dest="deleted_docs",help="'Yes' to also match deleted documents, 'no' to exclude them, 'only' to match only deleted ones (e.g. for undeletion)",metavar="yes/no/only",default='no')
query_options.add_option("--with-empty-recs",choices=['yes','no','only'],type="choice",dest="empty_recs",help="'Yes' to also match records without attached documents, 'no' to exclude them, 'only' to consider only such records (e.g. for statistics)",metavar="yes/no/only",default='no')
query_options.add_option("--with-empty-docs",choices=['yes','no','only'],type="choice",dest="empty_docs",help="'Yes' to also match documents without attached files, 'no' to exclude them, 'only' to consider only such documents (e.g. for sanity checking)",metavar="yes/no/only",default='no')
query_options.add_option("--with-record-modification-date",action="callback",callback=_date_range_callback,dest="md_rec",nargs=1,type="string",default=(None,None),help="matches records modified date1 and date2; dates can be expressed relatively, e.g.:\"-5m,2030-2-23 04:40\" # matches records modified since 5 minutes ago until the 2030...",metavar="date1,date2")
query_options.add_option("--with-record-creation-date",action="callback",callback=_date_range_callback,dest="cd_rec",nargs=1,type="string",default=(None,None),help="matches records created between date1 and date2; dates can be expressed relatively",metavar="date1,date2")
query_options.add_option("--with-document-modification-date",action="callback",callback=_date_range_callback,dest="md_doc",nargs=1,type="string",default=(None,None),help="matches documents modified between date1 and date2; dates can be expressed relatively",metavar="date1,date2")
query_options.add_option("--with-document-creation-date",action="callback",callback=_date_range_callback,dest="cd_doc",nargs=1,type="string",default=(None,None),help="matches documents created between date1 and date2; dates can be expressed relatively",metavar="date1,date2")
query_options.add_option("--url",dest="url",help='matches the document referred by the URL, e.g. "%s/%s/1/files/foobar.pdf?version=2"'%(CFG_SITE_URL,CFG_SITE_RECORD))
query_options.add_option("--path",dest="path",help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1'%CFG_BIBDOCFILE_FILEDIR)
query_options.add_option("--with-docname",dest="docname",help='matches documents with the given docname (accept wildcards)')
query_options.add_option("--with-doctype",dest="doctype",help='matches documents with the given doctype')
query_options.add_option('-p','--pattern',dest='pattern',help='matches records by pattern')
query_options.add_option('-c','--collection',dest='collection',help='matches records by collection')
query_options.add_option('--force',dest='force',help='force an action even when it\'s not necessary e.g. textify on an already textified bibdoc.',action='store_true',default=False)
parser.add_option_group(query_options)
getting_information_options=OptionGroup(parser,'Actions for getting information')
getting_information_options.add_option('--get-info',dest='action',action='store_const',const='get-info',help='print all the informations about the matched record/documents')
getting_information_options.add_option('--get-disk-usage',dest='action',action='store_const',const='get-disk-usage',help='print disk usage statistics of the matched documents')
getting_information_options.add_option('--get-history',dest='action',action='store_const',const='get-history',help='print the matched documents history')
getting_information_options.add_option('--get-stats',dest='action',action='store_const',const='get-stats',help='print some statistics of file properties grouped by collections')
revising_options=OptionGroup(parser,'Action for revising content')
revising_options.add_option("--append",dest='append_path',help='specify the URL/path of the file that will appended to the bibdoc (implies --with-empty-recs=yes)',metavar='PATH/URL')
revising_options.add_option("--revise",dest='revise_path',help='specify the URL/path of the file that will revise the bibdoc',metavar='PATH/URL')
revising_options.add_option("--revert",dest='action',action='store_const',const='revert',help='reverts a document to the specified version')
revising_options.add_option("--delete",action='store_const',const='delete',dest='action',help='soft-delete the matched documents')
revising_options.add_option("--hard-delete",action='store_const',const='hard-delete',dest='action',help='hard-delete the single matched document with a specific format and a specific revision (this operation is not revertible)')
revising_options.add_option("--purge",action='store_const',const='purge',dest='action',help='purge (i.e. hard-delete any format of any version prior to the latest version of) the matched documents')
revising_options.add_option("--expunge",action='store_const',const='expunge',dest='action',help='expunge (i.e. hard-delete any version and formats of) the matched documents')
revising_options.add_option("--with-version",dest="version",help="specifies the version(s) to be used with hide, unhide, e.g.: 1-2,3 or ALL. Specifies the version to be used with hard-delete and revert, e.g. 2")
revising_options.add_option("--with-format",dest="format",help='to specify a format when appending/revising/deleting/reverting a document, e.g. "pdf"',metavar='FORMAT')
revising_options.add_option("--with-flags",dest='flags',help='comma-separated optional list of flags used when appending/revising a document. Valid flags are: %s'%', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS),default=None)
parser.add_option_group(revising_options)
housekeeping_options=OptionGroup(parser,'Actions for housekeeping')
housekeeping_options.add_option("--check-md5",action='store_const',const='check-md5',dest='action',help='check md5 checksum validity of files')
housekeeping_options.add_option("--check-format",action='store_const',const='check-format',dest='action',help='check if any format-related inconsistences exists')
housekeeping_options.add_option("--check-duplicate-docnames",action='store_const',const='check-duplicate-docnames',dest='action',help='check for duplicate docnames associated with the same record')
housekeeping_options.add_option("--update-md5",action='store_const',const='update-md5',dest='action',help='update md5 checksum of files')
housekeeping_options.add_option("--fix-all",action='store_const',const='fix-all',dest='action',help='fix inconsistences in filesystem vs database vs MARC')
housekeeping_options.add_option("--fix-marc",action='store_const',const='fix-marc',dest='action',help='synchronize MARC after filesystem/database')
housekeeping_options.add_option("--fix-format",action='store_const',const='fix-format',dest='action',help='fix format related inconsistences')
housekeeping_options.add_option("--fix-duplicate-docnames",action='store_const',const='fix-duplicate-docnames',dest='action',help='fix duplicate docnames associated with the same record')
housekeeping_options.add_option("--fix-bibdocfsinfo-cache",action='store_const',const='fix-bibdocfsinfo-cache',dest='action',help='fix bibdocfsinfo cache related inconsistences')
parser.add_option_group(housekeeping_options)
experimental_options=OptionGroup(parser,'Experimental options (do not expect to find them in the next release)')
experimental_options.add_option('--textify',dest='action',action='store_const',const='textify',help='extract text from matched documents and store it for later indexing')
experimental_options.add_option('--with-ocr',dest='perform_ocr',action='store_true',default=False,help='when used with --textify, wether to perform OCR')
parser.add_option('-H','--human-readable',dest='human_readable',action='store_true',default=False,help='print sizes in human readable format (e.g., 1KB 234MB 2GB)')
parser.add_option('--yes-i-know',action='store_true',dest='yes-i-know',help='use with care!')
returnparser
defprint_info(recid,docid,info):
"""Nicely print info about a recid, docid pair."""
wait_for_user("WARNING: a document with name %s and format %s already exists for recid %s. A new document with name %s will be created instead."%(repr(docname),repr(format),repr(recid),repr(new_docname)))
docname=new_docname
ffts={recid:[{
'docname':docname,
'comment':comment,
'description':description,
'restriction':restriction,
'doctype':doctype,
'format':format,
'url':url,
'options':flags
}]}
returnbibupload_ffts(ffts,append=True)
defcli_revise(options,revise_path):
"""Create a bibupload FFT task submission for appending a format."""
"""Check if any format-related inconsistences exists."""
count=0
tot=0
duplicate=False
forrecidincli_recids_iterator(options):
tot+=1
bibrecdocs=BibRecDocs(recid)
ifnotbibrecdocs.check_duplicate_docnames():
print>>sys.stderr,"recid %s has duplicate docnames!"
broken=True
duplicate=True
else:
broken=False
fordocnameinbibrecdocs.get_bibdoc_names():
ifnotbibrecdocs.check_format(docname):
print>>sys.stderr,"recid %s with docname %s need format fixing"%(recid,docname)
broken=True
ifbroken:
count+=1
ifcount:
result="%d out of %d records need their formats to be fixed."%(count,tot)
else:
result="All records appear to be correct with respect to formats."
ifduplicate:
result+=" Note however that at least one record appear to have duplicate docnames. You should better fix this situation by using --fix-duplicate-docnames."