Skip to content

Commit

Permalink
Merge branch 'release/2.3.15'
Browse files Browse the repository at this point in the history
  • Loading branch information
ikirudennis committed Jul 20, 2017
2 parents d7f5a75 + 33d960e commit 059e5e9
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 52 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ python:
- "3.4"
- "3.5"
- "3.6"
- "pypy"
- "pypy-5.4"
# command to install dependencies
install:
- if [[ $REQUIREMENTS == true ]] ; then pip install -r requirements.txt ; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]] ; then pip install coverage==3.7.1; fi
- pip install coveralls pytest pytest-cov coverage codecov
- pip install -e .
- if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy' ]] ; then pip install regex; fi
- if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy-5.4' ]] ; then pip install regex; fi
# command to run tests
script: py.test
sudo: false
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.textile
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
h1. Textile Changelog

h2. Version 2.3.15
* Bugfix: Don't break on unicode characters in the fragment of a url.

h2. Version 2.3.14
* Bugfix: Fix textile on Python 2.6 ("#48":https://github.com/textile/python-textile/issues/48)

h2. Version 2.3.13
* Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django.
* Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45)
* Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46)
* Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47)

h2. Version 2.3.12
* Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43)
* Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44)
Expand Down
27 changes: 27 additions & 0 deletions tests/test_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,30 @@ def test_blockcode_comment():
t = textile.Textile()
result = t.parse(input)
assert result == expect

def test_extended_pre_block_with_many_newlines():
"""Extra newlines in an extended pre block should not get cut down to only
two."""
text = '''pre.. word
another
word
yet anothe word'''
expect = '''<pre>word
another
word
yet anothe word</pre>'''
result = textile.textile(text)
assert result == expect

text = 'p. text text\n\n\nh1. Hello\n'
expect = '\t<p>text text</p>\n\n\n\t<h1>Hello</h1>'
result = textile.textile(text)
assert result == expect
43 changes: 43 additions & 0 deletions tests/test_github_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,46 @@ def test_github_issue_43():
result = textile.textile(text)
expect = '<pre>smart ‘quotes’ are not smart!</pre>'
assert result == expect

def test_github_issue_45():
"""Incorrect transform unicode url"""
text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
result = textile.textile(text)
expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
assert result == expect

def test_github_issue_46():
"""Key error on mal-formed numbered lists. CAUTION: both the input and the
ouput are ugly."""
text = '# test\n### test\n## test'
expect = ('\t<ol>\n\t\t<li>test\n\t\t\t<ol>\n\t\t\t\t<li>test</li>'
'\n\t\t\t</ol></li>\n\t\t<ol>\n\t\t\t<li>test</li>'
'\n\t\t</ol></li>\n\t\t</ol>')
result = textile.textile(text)
assert result == expect

def test_github_issue_47():
"""Incorrect wrap pre-formatted value"""
text = '''pre.. word
another
word
yet anothe word'''
result = textile.textile(text)
expect = '''<pre>word
another
word
yet anothe word</pre>'''
assert result == expect

def test_github_issue_49():
"""Key error on russian hash-route link"""
s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры'
result = textile.textile(s)
expect = '\t<p><a href="https://ru.vuejs.org/v2/guide/components.html#Входные-параметры">link</a></p>'
assert result == expect
4 changes: 0 additions & 4 deletions tests/test_glyphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,3 @@ def test_glyphs():
result = t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
expect = '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
assert result == expect

result = t.glyphs('test"')
expect = 'test&#8221; '
assert result == expect
8 changes: 8 additions & 0 deletions tests/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,11 @@ def test_rel_attribute():
result = t.parse('"$":http://domain.tld')
expect = '\t<p><a href="http://domain.tld" rel="nofollow">domain.tld</a></p>'
assert result == expect

def test_quotes_in_link_text():
"""quotes in link text are tricky."""
test = '""this is a quote in link text"":url'
t = Textile()
result = t.parse(test)
expect = '\t<p><a href="url">&#8220;this is a quote in link text&#8221;</a></p>'
assert result == expect
2 changes: 1 addition & 1 deletion tests/test_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
('@monospaced text@, followed by text',
'\t<p><code>monospaced text</code>, followed by text</p>'),

('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\t<p>some text</p>'),
('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\n\n\n\n\t<p>some text</p>'),

('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux</pre>'),

Expand Down
126 changes: 85 additions & 41 deletions textile/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@

import uuid
import six
from six.moves.urllib_parse import (urlparse, urlsplit, urlunsplit, quote,
unquote)

from textile.tools import sanitizer, imagesize
from textile.regex_strings import (align_re_s, cls_re_s, halign_re_s,
pnct_re_s, regex_snippets, syms_re_s, table_span_re_s, valign_re_s)
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
regex_snippets, syms_re_s, table_span_re_s)
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines,
parse_attributes, pba)
Expand All @@ -35,10 +37,6 @@
except ImportError:
from ordereddict import OrderedDict

from six.moves import urllib
urlparse, urlsplit, urlunsplit, quote, unquote = (urllib.parse.urlparse,
urllib.parse.urlsplit, urllib.parse.urlunsplit, urllib.parse.quote,
urllib.parse.unquote)

try:
import regex as re
Expand Down Expand Up @@ -277,6 +275,8 @@ def parse(self, text, rel=None, sanitize=False):
# a newline, replace it with a new style break tag and a newline.
text = re.sub(r'<br( /)?>(?!\n)', '<br />\n', text)

text = text.rstrip('\n')

return text

def table(self, text):
Expand Down Expand Up @@ -346,7 +346,14 @@ def fTextileList(self, match):
# This will only increment the count for list items, not
# definition items
if showitem:
self.olstarts[tl] = self.olstarts[tl] + 1
# Assume properly formatted input
try:
self.olstarts[tl] = self.olstarts[tl] + 1
# if we get here, we've got some poor textile formatting.
# add this type of list to olstarts and assume we'll start
# it at 1. expect screwy output.
except KeyError:
self.olstarts[tl] = 1

nm = re.match("^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
"[ .].*".format(cls_re_s), nextline)
Expand Down Expand Up @@ -420,15 +427,29 @@ def block(self, text):
tre = '|'.join(self.btag)
else:
tre = '|'.join(self.btag_lite)
text = text.split('\n\n')

# split the text by two or more newlines, retaining the newlines in the
# split list
text = re.split(r'(\n{2,})', text)

# some blocks, when processed, will ask us to output nothing, if that's
# the case, we'd want to drop the whitespace which comes after it.
eat_whitespace = False

tag = 'p'
atts = cite = graf = ext = ''
atts = cite = ext = ''

last_item_is_a_shelf = False
out = []

for line in text:
# the line is just whitespace, add it to the output, and move on
if not line.strip():
if not eat_whitespace:
out.append(line)
continue

eat_whitespace = False

pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
align_re_s, cls_re_s))
Expand All @@ -437,14 +458,15 @@ def block(self, text):
if match:
# if we had a previous extended tag but not this time, close up
# the tag
if out:
last_item_is_a_shelf = out[-1] in self.shelf
if ext and match.group('tag') and last_item_is_a_shelf:
content = out.pop()
if ext and out:
# it's out[-2] because the last element in out is the
# whitespace that preceded this line
content = encode_html(out[-2], quotes=True)
content = generate_tag(block.inner_tag, content,
block.inner_atts)
out.append(generate_tag(block.outer_tag, content,
block.outer_atts))
content = generate_tag(block.outer_tag, content,
block.outer_atts)
out[-2] = content
tag, atts, ext, cite, content = match.groups()
block = Block(self, **match.groupdict())
inner_block = generate_tag(block.inner_tag, block.content,
Expand All @@ -463,40 +485,58 @@ def block(self, text):
# no tag specified
else:
# if we're inside an extended block, add the text from the
# previous extension to the front
# previous line to the front
if ext and out:
line = '{0}\n\n{1}'.format(out.pop(), line)
whitespace = ' \t\n\r\f\v'
if ext or not line[0] in whitespace:
line = '{0}{1}'.format(out.pop(), line)
# the logic in the if statement below is a bit confusing in
# php-textile. I'm still not sure I understand what the php
# code is doing. Something tells me it's a phpsadness. Anyway,
# this works, and is much easier to understand: if we're not in
# an extension, and the line doesn't begin with a space, treat
# it like a block to insert. Lines that begin with a space are
# not processed as a block.
if not ext and not line[0] == ' ':
block = Block(self, tag, atts, ext, cite, line)
# if the block contains html tags, generate_tag would
# mangle it, so process as is.
if block.tag == 'p' and not has_raw_text(block.content):
line = block.content
else:
line = generate_tag(block.outer_tag, block.content,
block.outer_atts)
if block.inner_tag == 'code':
line = block.content
if block.outer_tag != 'pre' and not has_raw_text(line):
line = "\t{0}".format(line)
line = "\t{0}".format(line)
else:
line = self.graf(line)

line = self.doPBr(line)
line = line.replace('<br>', '<br />')

if line.strip():
# if we're in an extended block, and we haven't specified a new
# tag, join this line to the last item of the output
if ext and not match:
last_item = out.pop()
out.append('{0}{1}'.format(last_item, line))
elif not block.eat:
# or if it's a type of block which indicates we shouldn't drop
# it, add it to the output.
out.append(line)

if not ext:
tag = 'p'
atts = ''
cite = ''
graf = ''

# if it's a block we should drop, don't keep the whitespace which
# will come after it.
if block.eat:
eat_whitespace = True

# at this point, we've gone through all the lines, and if there's still
# an extension in effect, we close it here.
if ext and out:
out.append(generate_tag(block.outer_tag, out.pop(),
block.outer_atts))
return '\n\n'.join(out)
final = generate_tag(block.outer_tag, out.pop(), block.outer_atts)
out.append(final)
return ''.join(out)

def footnoteRef(self, text):
# somehow php-textile gets away with not capturing the space.
Expand Down Expand Up @@ -537,10 +577,6 @@ def glyphs(self, text):
So, for the first pass, we use the glyph_search_initial set of
regexes. For all remaining passes, we use glyph_search
"""
# fix: hackish
if text.endswith('"'):
text = '{0} '.format(text)

text = text.rstrip('\n')
result = []
searchlist = self.glyph_search_initial
Expand Down Expand Up @@ -689,7 +725,7 @@ def markStartOfLinks(self, text):

try:
possibility = possible_start_quotes.pop()
except IndexError:
except IndexError: # pragma: no cover
# If out of possible starting segments we back the
# last one from the linkparts array
linkparts.pop()
Expand Down Expand Up @@ -942,11 +978,19 @@ def encode_url(self, url):
quote(netloc_parsed['password']))
host = netloc_parsed['host']
port = netloc_parsed['port'] and netloc_parsed['port']
path = '/'.join( # could be encoded slashes!
quote(unquote(pce).encode('utf8'), b'')
for pce in parsed.path.split('/')
)
fragment = quote(unquote(parsed.fragment))
# the below splits the path portion of the url by slashes, translates
# percent-encoded characters back into strings, then re-percent-encodes
# what's necessary. Sounds screwy, but the url could include encoded
# slashes, and this is a way to clean that up. It branches for PY2/3
# because the quote and unquote functions expects different input
# types: unicode strings for PY2 and str for PY3.
if six.PY2:
path_parts = (quote(unquote(pce.encode('utf8')), b'') for pce in
parsed.path.split('/'))
else:
path_parts = (quote(unquote(pce), b'') for pce in
parsed.path.split('/'))
path = '/'.join(path_parts)

# put it back together
netloc = ''
Expand All @@ -958,7 +1002,7 @@ def encode_url(self, url):
netloc = '{0}{1}'.format(netloc, host)
if port:
netloc = '{0}:{1}'.format(netloc, port)
return urlunsplit((scheme, netloc, path, parsed.query, fragment))
return urlunsplit((scheme, netloc, path, parsed.query, parsed.fragment))

def span(self, text):
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
Expand Down Expand Up @@ -1356,7 +1400,7 @@ def _increment_link_index(self):
return self.linkIndex


def textile(text, html_type='xhtml', encoding=None, output=None):
def textile(text, html_type='xhtml'):
"""
Apply Textile to a block of text.
Expand Down
1 change: 1 addition & 0 deletions textile/objects/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def process(self):
# It will be empty if the regex matched and ate it.
if '' == notedef:
self.content = notedef
self.eat = True

fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
self.tag, flags=re.U)
Expand Down
Loading

0 comments on commit 059e5e9

Please sign in to comment.