Mercurial > public > mercurial-scm > hg-stable
diff mercurial/minirst.py @ 9156:c9c7e8cdac9c
minimal reStructuredText parser
author | Martin Geisler <mg@lazybytes.net> |
---|---|
date | Thu, 16 Jul 2009 23:25:25 +0200 |
parents | |
children | cd5b6a11b607 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/minirst.py Thu Jul 16 23:25:25 2009 +0200 @@ -0,0 +1,299 @@ +# minirst.py - minimal reStructuredText parser +# +# Copyright 2009 Matt Mackall <mpm@selenic.com> and others +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2, incorporated herein by reference. + +"""simplified reStructuredText parser. + +This parser knows just enough about reStructuredText to parse the +Mercurial docstrings. + +It cheats in a major way: nested blocks are not really nested. They +are just indented blocks that look like they are nested. This relies +on the user to keep the right indentation for the blocks. + +It only supports a small subset of reStructuredText: + +- paragraphs + +- definition lists (must use ' ' to indent definitions) + +- lists (items must start with '-') + +- literal blocks + +- option lists (supports only long options without arguments) + +- inline markup is not recognized at all. +""" + +import re, sys, textwrap + + +def findblocks(text): + """Find continuous blocks of lines in text. + + Returns a list of dictionaries representing the blocks. Each block + has an 'indent' field and a 'lines' field. + """ + blocks = [[]] + lines = text.splitlines() + for line in lines: + if line.strip(): + blocks[-1].append(line) + elif blocks[-1]: + blocks.append([]) + if not blocks[-1]: + del blocks[-1] + + for i, block in enumerate(blocks): + indent = min((len(l) - len(l.lstrip())) for l in block) + blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block]) + return blocks + + +def findliteralblocks(blocks): + """Finds literal blocks and adds a 'type' field to the blocks. + + Literal blocks are given the type 'literal', all other blocks are + given type the 'paragraph'. + """ + i = 0 + while i < len(blocks): + # Searching for a block that looks like this: + # + # +------------------------------+ + # | paragraph | + # | (ends with "::") | + # +------------------------------+ + # +---------------------------+ + # | indented literal block | + # +---------------------------+ + blocks[i]['type'] = 'paragraph' + if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks): + indent = blocks[i]['indent'] + adjustment = blocks[i+1]['indent'] - indent + + if blocks[i]['lines'] == ['::']: + # Expanded form: remove block + del blocks[i] + i -= 1 + elif blocks[i]['lines'][-1].endswith(' ::'): + # Partially minimized form: remove space and both + # colons. + blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3] + else: + # Fully minimized form: remove just one colon. + blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1] + + # List items are formatted with a hanging indent. We must + # correct for this here while we still have the original + # information on the indentation of the subsequent literal + # blocks available. + if blocks[i]['lines'][0].startswith('- '): + indent += 2 + adjustment -= 2 + + # Mark the following indented blocks. + while i+1 < len(blocks) and blocks[i+1]['indent'] > indent: + blocks[i+1]['type'] = 'literal' + blocks[i+1]['indent'] -= adjustment + i += 1 + i += 1 + return blocks + + +def findsections(blocks): + """Finds sections. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + for block in blocks: + # Searching for a block that looks like this: + # + # +------------------------------+ + # | Section title | + # | ------------- | + # +------------------------------+ + if (block['type'] == 'paragraph' and + len(block['lines']) == 2 and + block['lines'][1] == '-' * len(block['lines'][0])): + block['type'] = 'section' + return blocks + + +def findbulletlists(blocks): + """Finds bullet lists. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + i = 0 + while i < len(blocks): + # Searching for a paragraph that looks like this: + # + # +------+-----------------------+ + # | "- " | list item | + # +------| (body elements)+ | + # +-----------------------+ + if (blocks[i]['type'] == 'paragraph' and + blocks[i]['lines'][0].startswith('- ')): + items = [] + for line in blocks[i]['lines']: + if line.startswith('- '): + items.append(dict(type='bullet', lines=[], + indent=blocks[i]['indent'] + 2)) + line = line[2:] + items[-1]['lines'].append(line) + blocks[i:i+1] = items + i += len(items) - 1 + i += 1 + return blocks + + +_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$') +def findoptionlists(blocks): + """Finds option lists. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + i = 0 + while i < len(blocks): + # Searching for a paragraph that looks like this: + # + # +----------------------------+-------------+ + # | "--" option " " | description | + # +-------+--------------------+ | + # | (body elements)+ | + # +----------------------------------+ + if (blocks[i]['type'] == 'paragraph' and + _optionre.match(blocks[i]['lines'][0])): + options = [] + for line in blocks[i]['lines']: + m = _optionre.match(line) + if m: + option, arg, rest = m.groups() + width = len(option) + len(arg) + options.append(dict(type='option', lines=[], + indent=blocks[i]['indent'], + width=width)) + options[-1]['lines'].append(line) + blocks[i:i+1] = options + i += len(options) - 1 + i += 1 + return blocks + + +def finddefinitionlists(blocks): + """Finds definition lists. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + i = 0 + while i < len(blocks): + # Searching for a paragraph that looks like this: + # + # +----------------------------+ + # | term | + # +--+-------------------------+--+ + # | definition | + # | (body elements)+ | + # +----------------------------+ + if (blocks[i]['type'] == 'paragraph' and + len(blocks[i]['lines']) > 1 and + not blocks[i]['lines'][0].startswith(' ') and + blocks[i]['lines'][1].startswith(' ')): + definitions = [] + for line in blocks[i]['lines']: + if not line.startswith(' '): + definitions.append(dict(type='definition', lines=[], + indent=blocks[i]['indent'])) + definitions[-1]['lines'].append(line) + definitions[-1]['hang'] = len(line) - len(line.lstrip()) + blocks[i:i+1] = definitions + i += len(definitions) - 1 + i += 1 + return blocks + + +def addmargins(blocks): + """Adds empty blocks for vertical spacing. + + This groups bullets, options, and definitions together with no vertical + space between them, and adds an empty block between all other blocks. + """ + i = 1 + while i < len(blocks): + if (blocks[i]['type'] == blocks[i-1]['type'] and + blocks[i]['type'] in ('bullet', 'option', 'definition')): + i += 1 + else: + blocks.insert(i, dict(lines=[''], indent=0, type='margin')) + i += 2 + return blocks + + +def formatblock(block, width): + """Format a block according to width.""" + indent = ' ' * block['indent'] + if block['type'] == 'margin': + return '' + elif block['type'] in ('literal', 'section'): + return indent + ('\n' + indent).join(block['lines']) + elif block['type'] == 'definition': + term = indent + block['lines'][0] + defindent = indent + block['hang'] * ' ' + text = ' '.join(map(str.strip, block['lines'][1:])) + return "%s\n%s" % (term, textwrap.fill(text, width=width, + initial_indent=defindent, + subsequent_indent=defindent)) + else: + initindent = subindent = indent + text = ' '.join(map(str.strip, block['lines'])) + if block['type'] == 'bullet': + initindent = indent[:-2] + '- ' + subindent = indent + elif block['type'] == 'option': + subindent = indent + block['width'] * ' ' + + return textwrap.fill(text, width=width, + initial_indent=initindent, + subsequent_indent=subindent) + + +def format(text, width): + """Parse and format the text according to width.""" + blocks = findblocks(text) + blocks = findliteralblocks(blocks) + blocks = findsections(blocks) + blocks = findbulletlists(blocks) + blocks = findoptionlists(blocks) + blocks = finddefinitionlists(blocks) + blocks = addmargins(blocks) + return '\n'.join(formatblock(b, width) for b in blocks) + + +if __name__ == "__main__": + from pprint import pprint + + def debug(func, blocks): + blocks = func(blocks) + print "*** after %s:" % func.__name__ + pprint(blocks) + print + return blocks + + text = open(sys.argv[1]).read() + blocks = debug(findblocks, text) + blocks = debug(findliteralblocks, blocks) + blocks = debug(findsections, blocks) + blocks = debug(findbulletlists, blocks) + blocks = debug(findoptionlists, blocks) + blocks = debug(finddefinitionlists, blocks) + blocks = debug(addmargins, blocks) + print '\n'.join(formatblock(b, 30) for b in blocks)