diff --git a/.gitignore b/.gitignore index b9c89215748..f348631f4f3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ pep-0000.txt pep-0000.rst pep-????.html peps.rss +peps.bib __pycache__ *.pyc *.pyo diff --git a/Makefile b/Makefile index 0f201b0c04a..e5858d29c64 100644 --- a/Makefile +++ b/Makefile @@ -27,12 +27,16 @@ pep-0000.rst: $(wildcard pep-????.txt) $(wildcard pep-????.rst) $(wildcard pep0/ rss: $(PYTHON) pep2rss.py . +bib: pep-0000.rst + $(PYTHON) pep2bib.py . + install: echo "Installing is not necessary anymore. It will be done in post-commit." clean: -rm pep-0000.rst -rm *.html + -rm *.bib -rm -rf build update: diff --git a/pep2bib.py b/pep2bib.py new file mode 100755 index 00000000000..00150cc1738 --- /dev/null +++ b/pep2bib.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +# usage: python3 pep2bib.py . + +import glob +import os +import re +import sys +from pybtex.database import Entry, BibliographyData + +from pep_parsing_helpers import pep_number, pep_creation_dt, first_line_starting_with, parse_authors + +BIB_PATH = os.path.join(sys.argv[1], 'peps.bib') + + +name_first_regex = re.compile(r'(.*)<.*>') +mail_first_regex = re.compile(r'.*\((.*)\)') +name_only_regex = re.compile(r'(.*)') + + +months = { + 1: 'jan', + 2: 'feb', + 3: 'mar', + 4: 'apr', + 5: 'may', + 6: 'jun', + 7: 'jul', + 8: 'aug', + 9: 'sep', + 10: 'oct', + 11: 'nov', + 12: 'dec', +} + + +def authors_to_bib(authors): + cleaned = [] + for author in authors: + match = name_first_regex.match(author) + if match is None: + match = mail_first_regex.match(author) + if match is None: + match = name_only_regex.match(author) + cleaned.append(match.group(1).strip()) + return " and ".join(cleaned) + + +def main(): + # get list of peps with creation time + # (from "Created:" string in pep .rst or .txt) + peps = glob.glob('pep-*.txt') + peps.extend(glob.glob('pep-*.rst')) + + peps_with_dt = [(pep_number(full_path), pep_creation_dt(full_path), full_path) for full_path in peps] + # sort peps by number + peps_with_dt.sort() + + items = {} + for n, dt, full_path in peps_with_dt: + title = first_line_starting_with(full_path, 'Title:') + author_string = first_line_starting_with(full_path, 'Author:') + authors = parse_authors(author_string) + authors = authors_to_bib(authors) + url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n + item = Entry('techreport', [ + ('author', authors), + ('title', 'PEP %d: %s' % (n, title)), + ('institution', "Python Software Foundation"), + ('year', str(dt.year)), + ('month', months[dt.month]), + ('type', 'PEP'), + ('number', str(n)), + ('url', url) + ]) + items['pep%d' % n] = item + + bib = BibliographyData(items) + bib_str = bib.to_string('bibtex') + + # pybtex always quotes strings, but we want month strings unquoted, so bib styles can replace it + bib_str = re.sub('month = "(.*)"', r'month = \1', bib_str) + + with open(BIB_PATH, 'w', encoding="utf-8") as fp: + fp.write(bib_str) + + +if __name__ == '__main__': + main() diff --git a/pep2rss.py b/pep2rss.py index 52b532f51d6..9337fe29f8d 100755 --- a/pep2rss.py +++ b/pep2rss.py @@ -5,15 +5,15 @@ import datetime import glob import os -import re import sys -import time import PyRSS2Gen as rssgen import docutils.frontend import docutils.nodes import docutils.parsers.rst import docutils.utils +from pep_parsing_helpers import pep_creation_dt, first_line_starting_with, parse_authors + RSS_PATH = os.path.join(sys.argv[1], 'peps.rss') @@ -53,38 +53,12 @@ def pep_abstract(full_path: str) -> str: return abstract -def firstline_startingwith(full_path, text): - for line in open(full_path, encoding="utf-8"): - if line.startswith(text): - return line[len(text):].strip() - return None - - # get list of peps with creation time # (from "Created:" string in pep .rst or .txt) peps = glob.glob('pep-*.txt') peps.extend(glob.glob('pep-*.rst')) -def pep_creation_dt(full_path): - created_str = firstline_startingwith(full_path, 'Created:') - # bleh, I was hoping to avoid re but some PEPs editorialize - # on the Created line - m = re.search(r'''(\d+-\w+-\d{4})''', created_str) - if not m: - # some older ones have an empty line, that's okay, if it's old - # we ipso facto don't care about it. - # "return None" would make the most sense but datetime objects - # refuse to compare with that. :-| - return datetime.datetime(*time.localtime(0)[:6]) - created_str = m.group(1) - try: - t = time.strptime(created_str, '%d-%b-%Y') - except ValueError: - t = time.strptime(created_str, '%d-%B-%Y') - return datetime.datetime(*t[:6]) - - peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps] # sort peps by date, newest first peps_with_dt.sort(reverse=True) @@ -96,8 +70,9 @@ def pep_creation_dt(full_path): n = int(full_path.split('-')[-1].split('.')[0]) except ValueError: pass - title = firstline_startingwith(full_path, 'Title:') - author = firstline_startingwith(full_path, 'Author:') + title = first_line_starting_with(full_path, 'Title:') + authors = first_line_starting_with(full_path, 'Author:') + author = parse_authors(authors)[0] # RSS only supports one author abstract = pep_abstract(full_path) url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n item = rssgen.RSSItem( diff --git a/pep_parsing_helpers.py b/pep_parsing_helpers.py new file mode 100644 index 00000000000..e94159afb8a --- /dev/null +++ b/pep_parsing_helpers.py @@ -0,0 +1,56 @@ +import re +import datetime +import time + + +def first_line_starting_with(full_path, text): + result = None + for line in open(full_path, encoding="utf-8"): + if result is not None: + if not line[0].strip(): # Line begins with whitespace + result += line + else: + return result + if line.startswith(text): + result = line[len(text):].strip() + return None + + +def pep_creation_dt(full_path): + created_str = first_line_starting_with(full_path, 'Created:') + # bleh, I was hoping to avoid re but some PEPs editorialize + # on the Created line + m = re.search(r'''(\d+-\w+-\d{4})''', created_str) + if not m: + # some older ones have an empty line, that's okay, if it's old + # we ipso facto don't care about it. + # "return None" would make the most sense but datetime objects + # refuse to compare with that. :-| + return datetime.datetime(*time.localtime(0)[:6]) + created_str = m.group(1) + try: + t = time.strptime(created_str, '%d-%b-%Y') + except ValueError: + t = time.strptime(created_str, '%d-%B-%Y') + return datetime.datetime(*t[:6]) + + +def pep_number(full_path): + n_str = full_path.split('-')[-1].split('.')[0] + try: + n = int(n_str) + except ValueError: + raise Exception("Can't parse pep number %s" % n_str) + + return n + + +def parse_authors(authors_str): + orig_authors = authors_str.split(',') + authors = [] + for author in orig_authors: + authors.append(author.strip()) + + return authors + + diff --git a/requirements.txt b/requirements.txt index 837f41b3ef7..66436c1b78b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,6 @@ docutils >= 0.16 # For RSS feedgen >= 0.9.0 # For RSS feed + +# For bibliography +pybtex >= 0.24.0 \ No newline at end of file