#!/usr/bin/env python import re from os import popen, system input = "ice_bib" bbl = "texput.bbl" output = "references.md" header = """References {#references} ========== @par Notes This large list collects all references which the PISM authors have found convenient. There is no claim that all of these references get direct use, or even mention, in the PISM project files.


""" # dummy LaTeX document that \cites everything and uses a special BibTeX style file: latexdummy = """\\documentclass{article} \\begin{document} \\cite{*}\\bibliography{%s}\\bibliographystyle{doxybib} \\end{document} """ % input # Remove an old .bbl so that LaTeX does not choke on it: system("rm -f %s" % bbl) # Run LaTeX: f= popen("latex", 'w') f.write(latexdummy) f.close() # Run BibTeX: system("bibtex texput") # Read all the lines from a .bbl generated by BibTeX f = open(bbl) lines = f.readlines() f.close() body = "".join(lines[:]) # NB! The order of substitutions is important. subs = [(r"%\n", r""), # lines wrapped by BibTeX (r"\\href{([^}]*)}{([^}]*)}", r'[\2](\1)'), # hyperref href command (r"\\url{([^}]*)}", r'[\1](\1)'), # hyperref url command (r"\\\w*{([^}]*)}", r" \1 "), # ignore other LaTeX commands (r"[}{]", r""), # curly braces (r"\$\\sim\$", r"~"), # LaTeX \sim used to represent ~ (r"---", r"—"), # em-dash (r"--", r"–"), # en-dash (r"([^/])~", r"\1 "), # tildes that are not in URLs (r'\\"([a-zA-Z])', r"&\1uml;"), # umlaut (r"\\'([a-zA-Z])", r"&\1grave;"), # grave (r'\\`([a-zA-Z])', r"&\1acute;"), # acute (r'\\^([a-zA-Z])', r"&\1circ;"), # circumflex (r'``', r'"'), # opening quotes (r"''", r'"'), # closing quotes (r"\\,", r""), # \, LaTeX math spacing command (r"\\ae", r"æ"), # ae ligature (r"\\tt", r"\\c"), # \tt (in the 'siple' entry) ] for (regex, substitution) in subs: body = re.compile(regex).sub(substitution, body) f = open(output, 'w') f.write(header) f.write(body) f.close()