summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrsiddharth <s@ricketyspace.net>2018-01-02 00:45:51 +0000
committerrsiddharth <s@ricketyspace.net>2018-01-02 00:45:51 +0000
commit9011ef7c4e768955bd3a4355d86eda39469bc127 (patch)
treee6ebf0614049509409609598d416cf8618986a1c
parentcad2e7b18e510a89e159148f1141691fd1fc6715 (diff)
md_tw.py: Add TWBlockLexer._process_list_item.
* md_tw.py (TWBlockLexer._process_list_item): New method * tests/test_md_tw.py (TestTWBlockLexer.test_parse_list_block): New test. * tests/data/blexer-lists.md: New file.
-rw-r--r--md_tw.py54
-rw-r--r--tests/data/blexer-lists.md66
-rw-r--r--tests/test_md_tw.py225
3 files changed, 345 insertions, 0 deletions
diff --git a/md_tw.py b/md_tw.py
index 238cbb2..7b20692 100644
--- a/md_tw.py
+++ b/md_tw.py
@@ -73,6 +73,60 @@ class TWBlockLexer(mistune.BlockLexer):
'text': m.group(0)
})
+ def _process_list_item(self, cap, bull):
+ cap = self.rules.list_item.findall(cap)
+
+ _next = False
+ length = len(cap)
+
+ for i in range(length):
+ item = cap[i][0]
+
+ # slurp and remove the bullet
+ space = len(item)
+ bullet = ''
+ bm = self.rules.list_bullet.match(item)
+ if bm:
+ bullet = bm.group(0)
+
+ item = self.rules.list_bullet.sub('', item)
+
+ # outdent
+ if '\n ' in item:
+ space = space - len(item)
+ pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
+ item = pattern.sub('', item)
+
+ # determine whether item is loose or not
+ loose = _next
+ if not loose and re.search(r'\n\n(?!\s*$)', item):
+ loose = True
+
+ rest = len(item)
+ if i != length - 1 and rest:
+ _next = item[rest-1] == '\n'
+ if not loose:
+ loose = _next
+
+ if loose:
+ t = 'loose_item_start'
+ else:
+ t = 'list_item_start'
+
+ self.tokens.append({
+ 'type': t,
+ 'text': bullet,
+ 'spaces': len(bullet)
+ })
+
+ # recurse
+ self.parse(item, self.list_rules)
+
+ self.tokens.append({
+ 'type': 'list_item_end',
+ 'spaces': len(bullet)
+ })
+
class TWInlineLexer(mistune.InlineLexer):
"""Text Wrap Inline level lexer for inline gramars."""
diff --git a/tests/data/blexer-lists.md b/tests/data/blexer-lists.md
new file mode 100644
index 0000000..2a9a0dd
--- /dev/null
+++ b/tests/data/blexer-lists.md
@@ -0,0 +1,66 @@
++ Re: Your Brains
++ Shop Vac
++ Flickr
+
+
+1. First of May
+2. You Ruined Everything
+3. Sucker Punch
+
+
+* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+ Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+ viverra nec, fringilla in, laoreet vitae, risus.
+* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+ Suspendisse id sem consectetuer libero luctus adipiscing.
+
+
+* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+viverra nec, fringilla in, laoreet vitae, risus.
+* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+Suspendisse id sem consectetuer libero luctus adipiscing.
+
+
+* Codey Monkey
+
+* Tom Cruise Crazy
+
+
+1. This is a list item with two paragraphs. Lorem ipsum dolor
+ sit amet, consectetuer adipiscing elit. Aliquam hendrerit
+ mi posuere lectus.
+
+ Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+ vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
+ sit amet velit.
+
+2. Suspendisse id sem consectetuer libero luctus adipiscing.
+
+
+* This is a list item with two paragraphs.
+
+ This is the second paragraph in the list item. You're
+only required to indent the first line. Lorem ipsum dolor
+sit amet, consectetuer adipiscing elit.
+
+* Another item in the same list.
+
+
+* A list item with a blockquote:
+
+ > This is a blockquote
+ > inside a list item.
+
+
+* A list item with a code block:
+
+ $ :(){:|:&};:
+
+
+1. This is a list item has a nested list.
+
+ 1. Lorem ipsum dolor sit amet, consectetuer adipiscing
+ elit. Aliquam hendrerit mi posuere lectus.
+
+ $ :(){:|:&};:
diff --git a/tests/test_md_tw.py b/tests/test_md_tw.py
index da7981c..d90db2c 100644
--- a/tests/test_md_tw.py
+++ b/tests/test_md_tw.py
@@ -103,6 +103,231 @@ class TestTWBlockLexer(object):
self._validate(tokens, 'hrule', expected_hrs)
+ def test_parse_list_block(self):
+ tokens = self._parse('blexer-lists.md')
+
+ def process(tokens):
+ token = tokens.pop(0)
+ while token:
+ type_ = token['type']
+
+ expected_token = None
+ if type_ in expected:
+ expected_token = expected[type_].pop(0)
+
+ validate(token, expected_token)
+
+ if type_ == 'list_end':
+ break
+ else:
+ token = tokens.pop(0)
+
+ return tokens
+
+ def validate(token, expected_token=None):
+ type_ = token['type']
+
+ if type_ == 'list_item_start':
+ assert 'text' in token
+ assert 'spaces' in token
+ elif type_ == 'list_item_end':
+ assert 'spaces' in token
+
+ if not expected_token:
+ return
+
+ if 'text' in token:
+ assert_equal(token['text'], expected_token['text'])
+ if 'spaces' in token:
+ assert_equal(token['spaces'], expected_token['spaces'])
+
+ return
+
+ # test list 1
+ expected = {
+ 'list_item_start': [
+ {'text': '+ ', 'spaces': 4},
+ {'text': '+ ', 'spaces': 4},
+ {'text': '+ ', 'spaces': 4}
+ ],
+ 'text': [
+ {'text': 'Re: Your Brains'},
+ {'text': 'Shop Vac'},
+ {'text': 'Flickr'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ {'spaces': 4}
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 2
+ expected = {
+ 'list_item_start': [
+ {'text': '1. ', 'spaces': 4},
+ {'text': '2. ', 'spaces': 4},
+ {'text': '3. ', 'spaces': 4}
+ ],
+ 'text': [
+ {'text': 'First of May'},
+ {'text': 'You Ruined Everything'},
+ {'text': 'Sucker Punch'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ {'spaces': 4}
+ ]
+ }
+ token = process(tokens)
+
+ # test list 3
+ expected = {
+ 'list_item_start': [
+ {'text': '* ', 'spaces': 4},
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'},
+ {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'},
+ {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'},
+ {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'},
+ {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 4
+ expected = {
+ 'list_item_start': [
+ {'text': '* ', 'spaces': 4},
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'},
+ {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'},
+ {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'},
+ {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'},
+ {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 5
+ expected = {
+ 'loose_item_start': [
+ {'text': '* ', 'spaces': 4},
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'Codey Monkey'},
+ {'text': 'Tom Cruise Crazy'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(token)
+
+ # test list 5
+ expected = {
+ 'loose_item_start': [
+ {'text': '1. ', 'spaces': 4},
+ {'text': '2. ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'This is a list item with two paragraphs. Lorem ipsum dolor'},
+ {'text': 'sit amet, consectetuer adipiscing elit. Aliquam hendrerit'},
+ {'text': 'mi posuere lectus.'},
+ {'text': 'Vestibulum enim wisi, viverra nec, fringilla in, laoreet'},
+ {'text': 'vitae, risus. Donec sit amet nisl. Aliquam semper ipsum'},
+ {'text': 'sit amet velit.'},
+ {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 6
+ expected = {
+ 'loose_item_start': [
+ {'text': '* ', 'spaces': 4},
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'This is a list item with two paragraphs.'},
+ {'text': 'This is the second paragraph in the list item. You\'re'},
+ {'text': 'only required to indent the first line. Lorem ipsum dolor'},
+ {'text': 'sit amet, consectetuer adipiscing elit.'},
+ {'text': 'Another item in the same list.'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 7
+ expected = {
+ 'loose_item_start': [
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'A list item with a blockquote:'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 7
+ expected = {
+ 'loose_item_start': [
+ {'text': '* ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'A list item with a code block:'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
+ # test list 7
+ expected = {
+ 'loose_item_start': [
+ {'text': '1. ', 'spaces': 4},
+ {'text': '1. ', 'spaces': 4},
+ ],
+ 'text': [
+ {'text': 'This is a list item has a nested list.'},
+ {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing'},
+ {'text': 'elit. Aliquam hendrerit mi posuere lectus.'},
+ ],
+ 'list_item_end': [
+ {'spaces': 4},
+ {'spaces': 4},
+ ]
+ }
+ tokens = process(tokens)
+
def teardown(self):
pass