Page MenuHomec4science

i18n-extract-from-wml.py
No OneTemporary

File Metadata

Created
Sat, Jul 20, 22:55

i18n-extract-from-wml.py

## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002, 2003, 2004, 2005 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This tool extracts sentences to be translated from HTML / WML
files. The sentences to translate are marked with the following tag:
Blah blah _(To be translated)_ blah.
These tags can span several lines. Extra whitespace is discarded.
"""
import sys, re
print r'''
# # This file is part of the CERN Document Server Software (CDSware).
# # Copyright (C) 2002, 2003, 2004, 2005 CERN.
# #
# # The CDSware is free software; you can redistribute it and/or
# # modify it under the terms of the GNU General Public License as
# # published by the Free Software Foundation; either version 2 of the
# # License, or (at your option) any later version.
# #
# # The CDSware is distributed in the hope that it will be useful, but
# # WITHOUT ANY WARRANTY; without even the implied warranty of
# # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# # General Public License for more details.
# #
# # You should have received a copy of the GNU General Public License
# # along with CDSware; if not, write to the Free Software Foundation, Inc.,
# # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
msgid ""
msgstr ""
"Project-Id-Version: CDSware 0.7\n"
"POT-Creation-Date: Tue Nov 22 16:44:03 2005\n"
"PO-Revision-Date: 2005-11-22 11:20+0100\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
'''
_tag_re = re.compile(r'_\((.*?)\)_', re.M)
_nl_re = re.compile('\n')
_ws_re = re.compile('\s+')
db = {}
for f in sys.argv[1:]:
data = open(f).read()
lines = [0]
for m in _nl_re.finditer(data):
lines.append(m.end())
for m in _tag_re.finditer(data.replace('\n', ' ')):
word = m.group(1)
pos = m.start()
line = len([x for x in lines if x < pos])
ref = '%s:%d' % (f, line)
# normalize the word a bit, as it comes from a file where
# whitespace is not too significant.
word = _ws_re.sub(' ', word.strip())
db.setdefault(word, []).append(ref)
def quote (text):
"""Normalize and quote a string for inclusion in the po file."""
return text.\
replace('\\', '\\\\').\
replace('\n', '\\\\n').\
replace('\t', '\\\\t').\
replace('"', '\\"')
for text, refs in db.items():
for ref in refs:
print "#: %s" % ref
print 'msgid "%s"' % quote(text)
print 'msgstr ""'
print

Event Timeline