File Metadata

Created: Sun, Aug 25, 04:25

wsm_generic_plugin.py
View Options

	## This file is part of CDS Invenio.
	## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
	##
	## CDS Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## CDS Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
	"""
	WebSubmit Metadata Plugin - This is the generic metadata extraction
	plugin. Contains methods to extract metadata from many kinds of files.

	Dependencies: extractor
	"""

	__plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"

	import extractor
	from invenio.bibdocfile import decompose_file

	def can_read_local(inputfile):
	"""
	Checks if inputfile is among metadata-readable file types

	@param inputfile: path to the image
	@type inputfile: string
	@rtype: boolean
	@return: True if file can be processed
	"""

	# Check file type (0 base, 1 name, 2 ext)
	ext = decompose_file(inputfile)[2]
	return ext.lower() in ['.html', '.doc', '.ps', '.xls', '.ppt',
	'.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac',
	'.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png',
	'.deb', '.rpm', '.tar.gz', '.zip', '.elf',
	'.s3m', '.xm', '.it', '.flv', '.real', '.avi',
	'.mpeg', '.qt', '.asf']

	def read_metadata_local(inputfile, verbose):
	"""
	Metadata extraction from many kind of files

	@param inputfile: path to the image
	@type inputfile: string
	@param verbose: verbosity
	@type verbose: int
	@rtype: dict
	@return: dictionary with metadata
	"""
	# Initialization dict
	meta_info = {}

	# Extraction
	xtract = extractor.Extractor()

	# Get the keywords
	keys = xtract.extract(inputfile)

	# Loop to dump data to the dict
	for keyword_type, keyword in keys:
	meta_info[keyword_type.encode('iso-8859-1')] = \
	keyword.encode('iso-8859-1')

	# Return the dictionary
	return meta_info

wsm_generic_plugin.py
No OneTemporary
Actions

File Metadata

wsm_generic_plugin.py
View Options

Event Timeline

wsm_generic_plugin.pyNo OneTemporaryActions

File Metadata

wsm_generic_plugin.pyView Options

Event Timeline

wsm_generic_plugin.py
No OneTemporary
Actions

wsm_generic_plugin.py
View Options