forked from Mirrors/apostrophe
256 lines
7.4 KiB
Python
256 lines
7.4 KiB
Python
|
#!/usr/bin/env python
|
|||
|
# -*- coding: utf-8 -*-
|
|||
|
|
|||
|
"""
|
|||
|
A set of functions useful for customizing bibtex fields.
|
|||
|
You can find inspiration from these functions to design yours.
|
|||
|
Each of them takes a record and return the modified record.
|
|||
|
"""
|
|||
|
|
|||
|
import itertools
|
|||
|
import re
|
|||
|
import logging
|
|||
|
|
|||
|
from .latexenc import unicode_to_latex, unicode_to_crappy_latex1, unicode_to_crappy_latex2, string_to_latex, protect_uppercase
|
|||
|
|
|||
|
logger = logging.getLogger(__name__)
|
|||
|
|
|||
|
__all__ = ['getnames', 'author', 'editor', 'journal', 'keyword', 'link',
|
|||
|
'page_double_hyphen', 'doi', 'type', 'convert_to_unicode',
|
|||
|
'homogeneize_latex_encoding']
|
|||
|
|
|||
|
|
|||
|
def getnames(names):
|
|||
|
"""Make people names as surname, firstnames
|
|||
|
or surname, initials. Should eventually combine up the two.
|
|||
|
|
|||
|
:param names: a list of names
|
|||
|
:type names: list
|
|||
|
:returns: list -- Correctly formated names
|
|||
|
"""
|
|||
|
tidynames = []
|
|||
|
for namestring in names:
|
|||
|
namestring = namestring.strip()
|
|||
|
if len(namestring) < 1:
|
|||
|
continue
|
|||
|
if ',' in namestring:
|
|||
|
namesplit = namestring.split(',', 1)
|
|||
|
last = namesplit[0].strip()
|
|||
|
firsts = [i.strip() for i in namesplit[1].split()]
|
|||
|
else:
|
|||
|
namesplit = namestring.split()
|
|||
|
last = namesplit.pop()
|
|||
|
firsts = [i.replace('.', '. ').strip() for i in namesplit]
|
|||
|
if last in ['jnr', 'jr', 'junior']:
|
|||
|
last = firsts.pop()
|
|||
|
for item in firsts:
|
|||
|
if item in ['ben', 'van', 'der', 'de', 'la', 'le']:
|
|||
|
last = firsts.pop() + ' ' + last
|
|||
|
tidynames.append(last + ", " + ' '.join(firsts))
|
|||
|
return tidynames
|
|||
|
|
|||
|
|
|||
|
def author(record):
|
|||
|
"""
|
|||
|
Split author field into a list of "Name, Surname".
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "author" in record:
|
|||
|
if record["author"]:
|
|||
|
record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
|
|||
|
else:
|
|||
|
del record["author"]
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def editor(record):
|
|||
|
"""
|
|||
|
Turn the editor field into a dict composed of the original editor name
|
|||
|
and a editor id (without coma or blank).
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "editor" in record:
|
|||
|
if record["editor"]:
|
|||
|
record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
|
|||
|
# convert editor to object
|
|||
|
record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]]
|
|||
|
else:
|
|||
|
del record["editor"]
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def page_double_hyphen(record):
|
|||
|
"""
|
|||
|
Separate pages by a double hyphen (--).
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "pages" in record:
|
|||
|
# hyphen, non-breaking hyphen, en dash, em dash, hyphen-minus, minus sign
|
|||
|
separators = [u'‐', u'‑', u'–', u'—', u'-', u'−']
|
|||
|
for separator in separators:
|
|||
|
if separator in record["pages"]:
|
|||
|
p = [i.strip().strip(separator) for i in record["pages"].split(separator)]
|
|||
|
record["pages"] = p[0] + '--' + p[-1]
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def type(record):
|
|||
|
"""
|
|||
|
Put the type into lower case.
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "type" in record:
|
|||
|
record["type"] = record["type"].lower()
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def journal(record):
|
|||
|
"""
|
|||
|
Turn the journal field into a dict composed of the original journal name
|
|||
|
and a journal id (without coma or blank).
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "journal" in record:
|
|||
|
# switch journal to object
|
|||
|
if record["journal"]:
|
|||
|
record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
|
|||
|
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def keyword(record, sep=',|;'):
|
|||
|
"""
|
|||
|
Split keyword field into a list.
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:param sep: pattern used for the splitting regexp.
|
|||
|
:type record: string, optional
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "keyword" in record:
|
|||
|
record["keyword"] = [i.strip() for i in re.split(sep, record["keyword"].replace('\n', ''))]
|
|||
|
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def link(record):
|
|||
|
"""
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if "link" in record:
|
|||
|
links = [i.strip().replace(" ", " ") for i in record["link"].split('\n')]
|
|||
|
record['link'] = []
|
|||
|
for link in links:
|
|||
|
parts = link.split(" ")
|
|||
|
linkobj = {"url": parts[0]}
|
|||
|
if len(parts) > 1:
|
|||
|
linkobj["anchor"] = parts[1]
|
|||
|
if len(parts) > 2:
|
|||
|
linkobj["format"] = parts[2]
|
|||
|
if len(linkobj["url"]) > 0:
|
|||
|
record["link"].append(linkobj)
|
|||
|
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def doi(record):
|
|||
|
"""
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
|
|||
|
"""
|
|||
|
if 'doi' in record:
|
|||
|
if 'link' not in record:
|
|||
|
record['link'] = []
|
|||
|
nodoi = True
|
|||
|
for item in record['link']:
|
|||
|
if 'doi' in item:
|
|||
|
nodoi = False
|
|||
|
if nodoi:
|
|||
|
link = record['doi']
|
|||
|
if link.startswith('10'):
|
|||
|
link = 'http://dx.doi.org/' + link
|
|||
|
record['link'].append({"url": link, "anchor": "doi"})
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def convert_to_unicode(record):
|
|||
|
"""
|
|||
|
Convert accent from latex to unicode style.
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
"""
|
|||
|
for val in record:
|
|||
|
if '\\' in record[val] or '{' in record[val]:
|
|||
|
for k, v in itertools.chain(unicode_to_crappy_latex1, unicode_to_latex):
|
|||
|
if v in record[val]:
|
|||
|
record[val] = record[val].replace(v, k)
|
|||
|
|
|||
|
# If there is still very crappy items
|
|||
|
if '\\' in record[val]:
|
|||
|
for k, v in unicode_to_crappy_latex2:
|
|||
|
if v in record[val]:
|
|||
|
parts = record[val].split(str(v))
|
|||
|
for key, record[val] in enumerate(parts):
|
|||
|
if key+1 < len(parts) and len(parts[key+1]) > 0:
|
|||
|
# Change order to display accents
|
|||
|
parts[key] = parts[key] + parts[key+1][0]
|
|||
|
parts[key+1] = parts[key+1][1:]
|
|||
|
record[val] = k.join(parts)
|
|||
|
return record
|
|||
|
|
|||
|
|
|||
|
def homogeneize_latex_encoding(record):
|
|||
|
"""
|
|||
|
Homogeneize the latex enconding style for bibtex
|
|||
|
|
|||
|
This function is experimental.
|
|||
|
|
|||
|
:param record: the record.
|
|||
|
:type record: dict
|
|||
|
:returns: dict -- the modified record.
|
|||
|
"""
|
|||
|
# First, we convert everything to unicode
|
|||
|
record = convert_to_unicode(record)
|
|||
|
# And then, we fall back
|
|||
|
for val in record:
|
|||
|
if val not in ('ID',):
|
|||
|
logger.debug('Apply string_to_latex to: %s', val)
|
|||
|
record[val] = string_to_latex(record[val])
|
|||
|
if val == 'title':
|
|||
|
logger.debug('Protect uppercase in title')
|
|||
|
logger.debug('Before: %s', record[val])
|
|||
|
record[val] = protect_uppercase(record[val])
|
|||
|
logger.debug('After: %s', record[val])
|
|||
|
return record
|