Mercurial > public > mercurial-scm > hg
view hgext/highlight/highlight.py @ 7120:db7557359636
highlight: convert text to local before passing to pygmentize (issue1341)
Example case:
Display file written in iso-8859-1 with current HGENCODING utf-8.
At the moment only an Error page appears because pygmentize
chokes on the replacement chars.
Alternatives:
1) Turn off highlighting and avoid UnicodeDecodeError
for files that are not in HGENCODING.
2) [this patch] use util.tolocal to display these files.
Alternative 2) seems ok, as this only concerns display and
readability.
See also: fe38b0a3a928, apparently put aside during refactor of
highlight.
Add test for UnicodeDecodeError with iso-8859-1 file contents.
author | Christian Ebert <blacktrash@gmx.net> |
---|---|
date | Fri, 17 Oct 2008 12:12:33 +0200 |
parents | ce94b3236ea4 |
children | de377b1a9a84 |
line wrap: on
line source
# highlight extension implementation file # # The original module was split in an interface and an implementation # file to defer pygments loading and speedup extension setup. from mercurial import demandimport demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',]) from mercurial import util from mercurial.templatefilters import filters from pygments import highlight from pygments.util import ClassNotFound from pygments.lexers import guess_lexer, guess_lexer_for_filename, TextLexer from pygments.formatters import HtmlFormatter SYNTAX_CSS = ('\n<link rel="stylesheet" href="{url}highlightcss" ' 'type="text/css" />') def pygmentize(field, fctx, style, tmpl): # append a <link ...> to the syntax highlighting css old_header = ''.join(tmpl('header')) if SYNTAX_CSS not in old_header: new_header = old_header + SYNTAX_CSS tmpl.cache['header'] = new_header text = fctx.data() if util.binary(text): return # avoid UnicodeDecodeError in pygments text = util.tolocal(text) # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text[:1024], encoding=util._encoding) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text[:1024], encoding=util._encoding) except (ClassNotFound, ValueError): lexer = TextLexer(encoding=util._encoding) formatter = HtmlFormatter(style=style, encoding=util._encoding) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n</pre>')] colorized = colorized[colorized.find('<pre>')+5:] coloriter = iter(colorized.splitlines()) filters['colorize'] = lambda x: coloriter.next() oldl = tmpl.cache[field] newl = oldl.replace('line|escape', 'line|colorize') tmpl.cache[field] = newl