forked from Mirrors/apostrophe
294 lines
12 KiB
Python
294 lines
12 KiB
Python
# -*- coding:utf-8 -*-
|
|
#
|
|
# Copyright (C) 2012, Carlos Jenkins <carlos@jenkins.co.cr>
|
|
# Copyright (C) 2012-2016, Maximilian Köhl <mail@koehlma.de>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""
|
|
This module extracts the .dic and .aff (Hunspell) dictionaries from any given
|
|
.oxt extension.
|
|
|
|
Extensions could be found at:
|
|
|
|
http://extensions.services.openoffice.org/dictionary
|
|
"""
|
|
|
|
import functools
|
|
import gettext
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import sys
|
|
import warnings
|
|
import xml.dom.minidom
|
|
import xml.parsers.expat
|
|
import zipfile
|
|
|
|
# enable deprecation warnings
|
|
warnings.simplefilter('always', DeprecationWarning)
|
|
|
|
# public objects
|
|
__all__ = ['extract_oxt', 'batch_extract', 'BadXml', 'BadExtensionFile',
|
|
'ExtractPathIsNoDirectory', 'BATCH_SUCCESS', 'BATCH_ERROR',
|
|
'BATCH_WARNING']
|
|
|
|
# logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# translation
|
|
locale_name = 'py{}gtkspellcheck'.format(sys.version_info.major)
|
|
_ = gettext.translation(locale_name, fallback=True).gettext
|
|
|
|
class BadXml(Exception):
|
|
"""
|
|
The XML dictionary registry is not valid XML.
|
|
"""
|
|
|
|
class BadExtensionFile(Exception):
|
|
"""
|
|
The extension has a wrong file format, should be a ZIP file.
|
|
"""
|
|
|
|
class ExtractPathIsNoDirectory(Exception):
|
|
"""
|
|
The given `extract_path` is no directory.
|
|
"""
|
|
|
|
|
|
def find_dictionaries(registry):
|
|
def oor_name(name, element):
|
|
return element.attributes['oor:name'].value.lower() == name
|
|
|
|
def get_property(name, properties):
|
|
property = list(filter(functools.partial(oor_name, name),
|
|
properties))
|
|
if property:
|
|
return property[0].getElementsByTagName('value')[0]
|
|
|
|
result = []
|
|
|
|
# find all "node" elements which have "dictionaries" as "oor:name" attribute
|
|
for dictionaries in filter(functools.partial(oor_name, 'dictionaries'),
|
|
registry.getElementsByTagName('node')):
|
|
# for all "node" elements in this dictionary nodes
|
|
for dictionary in dictionaries.getElementsByTagName('node'):
|
|
# get all "prop" elements
|
|
properties = dictionary.getElementsByTagName('prop')
|
|
# get the format property as text
|
|
format = get_property('format', properties).firstChild.data.strip()
|
|
if format and format == 'DICT_SPELL':
|
|
# find the locations property
|
|
locations = get_property('locations', properties)
|
|
# if the location property is text:
|
|
# %origin%/dictionary.aff %origin%/dictionary.dic
|
|
if locations.firstChild.nodeType == xml.dom.Node.TEXT_NODE:
|
|
locations = locations.firstChild.data
|
|
locations = locations.replace('%origin%/', '').strip()
|
|
result.append(locations.split())
|
|
# otherwise:
|
|
# <i>%origin%/dictionary.aff</i> <i>%origin%/dictionary.dic</i>
|
|
else:
|
|
locations = [item.firshChild.data.replace('%origin%/', '') \
|
|
.strip() for item in
|
|
locations.getElementsByTagName('it')]
|
|
result.append(locations)
|
|
|
|
return result
|
|
|
|
def extract(filename, target, override=False):
|
|
"""
|
|
Extract Hunspell dictionaries out of LibreOffice ``.oxt`` extensions.
|
|
|
|
:param filename: path to the ``.oxt`` extension
|
|
:param target: path to extract Hunspell dictionaries to
|
|
:param override: override existing files in the target directory
|
|
:rtype: list of the extracted dictionaries
|
|
|
|
This function extracts the Hunspell dictionaries (``.dic`` and ``.aff``
|
|
files) from the given ``.oxt`` extension found to ``target``.
|
|
|
|
Extensions could be found at:
|
|
|
|
http://extensions.services.openoffice.org/dictionary
|
|
"""
|
|
# TODO 5.0: remove this function
|
|
warnings.warn(('call to deprecated function "{}", '
|
|
'moved to separate package "oxt_extract", '
|
|
'will be removed in pygtkspellcheck 5.0').format(extract.__name__),
|
|
category=DeprecationWarning)
|
|
try:
|
|
with zipfile.ZipFile(filename, 'r') as extension:
|
|
files = extension.namelist()
|
|
|
|
registry = 'dictionaries.xcu'
|
|
if not registry in files:
|
|
for filename in files:
|
|
if filename.lower().endswith(registry):
|
|
registry = filename
|
|
|
|
if registry in files:
|
|
registry = xml.dom.minidom.parse(extension.open(registry))
|
|
dictionaries = find_dictionaries(registry)
|
|
extracted = []
|
|
for dictionary in dictionaries:
|
|
for filename in dictionary:
|
|
dict_file = os.path.join(target,
|
|
os.path.basename(filename))
|
|
if (not os.path.exists(dict_file)
|
|
or (override and os.path.isfile(dict_file))):
|
|
if filename in files:
|
|
with open(dict_file, 'wb') as _target:
|
|
with extension.open(filename, 'r') as _source:
|
|
extracted.append(os.path.basename(filename))
|
|
_target.write(_source.read())
|
|
else:
|
|
logger.warning('dictionary exists in registry '
|
|
'but not in the extension zip')
|
|
else:
|
|
logging.warning(('dictionary file "{}" already exists '
|
|
'and not overriding it'
|
|
).format(dict_file))
|
|
return extracted
|
|
except zipfile.BadZipfile:
|
|
raise BadExtensionFile('extension is not a valid ZIP file')
|
|
except xml.parsers.expat.ExpatError:
|
|
raise BadXml('dictionary registry is not valid XML')
|
|
|
|
BATCH_SUCCESS = 'success'
|
|
BATCH_ERROR = 'error'
|
|
BATCH_WARNING = 'warning'
|
|
|
|
def batch_extract(oxt_path, extract_path, override=False, move_path=None):
|
|
"""
|
|
Uncompress, read and install LibreOffice ``.oxt`` dictionaries extensions.
|
|
|
|
:param oxt_path: path to a directory containing the ``.oxt`` extensions
|
|
:param extract_path: path to extract Hunspell dictionaries files to
|
|
:param override: override already existing files
|
|
:param move_path: optional path to move the ``.oxt`` files after processing
|
|
:rtype: generator over all extensions, yielding result, extension name,
|
|
error, extracted dictionaries and translated error message - result
|
|
would be :const:`BATCH_SUCCESS` for success, :const:`BATCH_ERROR` if
|
|
some error happened or :const:`BATCH_WARNING` which contain some warning
|
|
messages instead of errors
|
|
|
|
This function extracts the Hunspell dictionaries (``.dic`` and ``.aff``
|
|
files) from all the ``.oxt`` extensions found on ``oxt_path`` directory to
|
|
the ``extract_path`` directory.
|
|
|
|
Extensions could be found at:
|
|
|
|
http://extensions.services.openoffice.org/dictionary
|
|
|
|
In detail, this functions does the following:
|
|
|
|
1. find all the ``.oxt`` extension files within ``oxt_path``
|
|
2. open (unzip) each extension
|
|
3. find the dictionary definition file within (*dictionaries.xcu*)
|
|
4. parse the dictionary definition file and locate the dictionaries files
|
|
5. uncompress those files to ``extract_path``
|
|
|
|
|
|
By default file overriding is disabled, set ``override`` parameter to True
|
|
if you want to enable it. As additional option, each processed extension can
|
|
be moved to ``move_path``.
|
|
|
|
Example::
|
|
|
|
for result, name, error, dictionaries, message in oxt_extract.batch_extract(...):
|
|
if result == oxt_extract.BATCH_SUCCESS:
|
|
print('successfully extracted extension "{}"'.format(name))
|
|
elif result == oxt_extract.BATCH_ERROR:
|
|
print('could not extract extension "{}"'.format(name))
|
|
print(message)
|
|
print('error {}'.format(error))
|
|
elif result == oxt_extract.BATCH_WARNING:
|
|
print('warning during processing extension "{}"'.format(name))
|
|
print(message)
|
|
print(error)
|
|
|
|
"""
|
|
|
|
# TODO 5.0: remove this function
|
|
warnings.warn(('call to deprecated function "{}", '
|
|
'moved to separate package "oxt_extract", '
|
|
'will be removed in pygtkspellcheck 5.0').format(extract.__name__),
|
|
category=DeprecationWarning)
|
|
|
|
# get the real, absolute and normalized path
|
|
oxt_path = os.path.normpath(os.path.abspath(os.path.realpath(oxt_path)))
|
|
|
|
# check that the input directory exists
|
|
if not os.path.isdir(oxt_path):
|
|
return
|
|
|
|
# create extract directory if not exists
|
|
if not os.path.exists(extract_path):
|
|
os.makedirs(extract_path)
|
|
|
|
# check that the extract path is a directory
|
|
if not os.path.isdir(extract_path):
|
|
raise ExtractPathIsNoDirectory('extract path is not a valid directory')
|
|
|
|
# get all .oxt extension at given path
|
|
oxt_files = [extension for extension in os.listdir(oxt_path)
|
|
if extension.lower().endswith('.oxt')]
|
|
|
|
for extension_name in oxt_files:
|
|
extension_path = os.path.join(oxt_path, extension_name)
|
|
|
|
try:
|
|
dictionaries = extract(extension_path, extract_path, override)
|
|
yield BATCH_SUCCESS, extension_name, None, dictionaries, ''
|
|
except BadExtensionFile as error:
|
|
logger.error(('extension "{}" is not a valid ZIP file'
|
|
).format(extension_name))
|
|
yield (BATCH_ERROR, extension_name, error, [],
|
|
_('extension "{}" is not a valid ZIP file'
|
|
).format(extension_name))
|
|
except BadXml as error:
|
|
logger.error(('extension "{}" has no valid XML dictionary registry'
|
|
).format(extension_name))
|
|
yield (BATCH_ERROR, extension_name, error, [],
|
|
_('extension "{}" has no valid XML dictionary registry'
|
|
).format(extension_name))
|
|
|
|
# move the extension after processing if user requires it
|
|
if move_path is not None:
|
|
# create move path if it doesn't exists
|
|
if not os.path.exists(move_path):
|
|
os.makedirs(move_path)
|
|
# move to the given path only if it is a directory and target
|
|
# doesn't exists
|
|
if os.path.isdir(move_path):
|
|
if (not os.path.exists(os.path.join(move_path, extension_name))
|
|
or override):
|
|
shutil.move(extension_path, move_path)
|
|
else:
|
|
logger.warning(('unable to move extension, file with same '
|
|
'name exists within move_path'))
|
|
yield (BATCH_WARNING, extension_name,
|
|
('unable to move extension, file with same name '
|
|
'exists within move_path'), [],
|
|
_('unable to move extension, file with same name '
|
|
'exists within move_path'))
|
|
else:
|
|
logger.warning(('unable to move extension, move_path is not a '
|
|
'directory'))
|
|
yield (BATCH_WARNING, extension_name,
|
|
('unable to move extension, move_path is not a '
|
|
'directory'), [],
|
|
_('unable to move extension, move_path is not a '
|
|
'directory')) |