From 9011ef7c4e768955bd3a4355d86eda39469bc127 Mon Sep 17 00:00:00 2001 From: rsiddharth Date: Tue, 2 Jan 2018 00:45:51 +0000 Subject: md_tw.py: Add TWBlockLexer._process_list_item. * md_tw.py (TWBlockLexer._process_list_item): New method * tests/test_md_tw.py (TestTWBlockLexer.test_parse_list_block): New test. * tests/data/blexer-lists.md: New file. --- md_tw.py | 54 +++++++++++ tests/data/blexer-lists.md | 66 +++++++++++++ tests/test_md_tw.py | 225 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 tests/data/blexer-lists.md diff --git a/md_tw.py b/md_tw.py index 238cbb2..7b20692 100644 --- a/md_tw.py +++ b/md_tw.py @@ -73,6 +73,60 @@ class TWBlockLexer(mistune.BlockLexer): 'text': m.group(0) }) + def _process_list_item(self, cap, bull): + cap = self.rules.list_item.findall(cap) + + _next = False + length = len(cap) + + for i in range(length): + item = cap[i][0] + + # slurp and remove the bullet + space = len(item) + bullet = '' + bm = self.rules.list_bullet.match(item) + if bm: + bullet = bm.group(0) + + item = self.rules.list_bullet.sub('', item) + + # outdent + if '\n ' in item: + space = space - len(item) + pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) + item = pattern.sub('', item) + + # determine whether item is loose or not + loose = _next + if not loose and re.search(r'\n\n(?!\s*$)', item): + loose = True + + rest = len(item) + if i != length - 1 and rest: + _next = item[rest-1] == '\n' + if not loose: + loose = _next + + if loose: + t = 'loose_item_start' + else: + t = 'list_item_start' + + self.tokens.append({ + 'type': t, + 'text': bullet, + 'spaces': len(bullet) + }) + + # recurse + self.parse(item, self.list_rules) + + self.tokens.append({ + 'type': 'list_item_end', + 'spaces': len(bullet) + }) + class TWInlineLexer(mistune.InlineLexer): """Text Wrap Inline level lexer for inline gramars.""" diff --git a/tests/data/blexer-lists.md b/tests/data/blexer-lists.md new file mode 100644 index 0000000..2a9a0dd --- /dev/null +++ b/tests/data/blexer-lists.md @@ -0,0 +1,66 @@ ++ Re: Your Brains ++ Shop Vac ++ Flickr + + +1. First of May +2. You Ruined Everything +3. Sucker Punch + + +* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + + +* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, +viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. +Suspendisse id sem consectetuer libero luctus adipiscing. + + +* Codey Monkey + +* Tom Cruise Crazy + + +1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + +2. Suspendisse id sem consectetuer libero luctus adipiscing. + + +* This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're +only required to indent the first line. Lorem ipsum dolor +sit amet, consectetuer adipiscing elit. + +* Another item in the same list. + + +* A list item with a blockquote: + + > This is a blockquote + > inside a list item. + + +* A list item with a code block: + + $ :(){:|:&};: + + +1. This is a list item has a nested list. + + 1. Lorem ipsum dolor sit amet, consectetuer adipiscing + elit. Aliquam hendrerit mi posuere lectus. + + $ :(){:|:&};: diff --git a/tests/test_md_tw.py b/tests/test_md_tw.py index da7981c..d90db2c 100644 --- a/tests/test_md_tw.py +++ b/tests/test_md_tw.py @@ -103,6 +103,231 @@ class TestTWBlockLexer(object): self._validate(tokens, 'hrule', expected_hrs) + def test_parse_list_block(self): + tokens = self._parse('blexer-lists.md') + + def process(tokens): + token = tokens.pop(0) + while token: + type_ = token['type'] + + expected_token = None + if type_ in expected: + expected_token = expected[type_].pop(0) + + validate(token, expected_token) + + if type_ == 'list_end': + break + else: + token = tokens.pop(0) + + return tokens + + def validate(token, expected_token=None): + type_ = token['type'] + + if type_ == 'list_item_start': + assert 'text' in token + assert 'spaces' in token + elif type_ == 'list_item_end': + assert 'spaces' in token + + if not expected_token: + return + + if 'text' in token: + assert_equal(token['text'], expected_token['text']) + if 'spaces' in token: + assert_equal(token['spaces'], expected_token['spaces']) + + return + + # test list 1 + expected = { + 'list_item_start': [ + {'text': '+ ', 'spaces': 4}, + {'text': '+ ', 'spaces': 4}, + {'text': '+ ', 'spaces': 4} + ], + 'text': [ + {'text': 'Re: Your Brains'}, + {'text': 'Shop Vac'}, + {'text': 'Flickr'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + {'spaces': 4} + ] + } + tokens = process(tokens) + + # test list 2 + expected = { + 'list_item_start': [ + {'text': '1. ', 'spaces': 4}, + {'text': '2. ', 'spaces': 4}, + {'text': '3. ', 'spaces': 4} + ], + 'text': [ + {'text': 'First of May'}, + {'text': 'You Ruined Everything'}, + {'text': 'Sucker Punch'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + {'spaces': 4} + ] + } + token = process(tokens) + + # test list 3 + expected = { + 'list_item_start': [ + {'text': '* ', 'spaces': 4}, + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'}, + {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'}, + {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'}, + {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'}, + {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 4 + expected = { + 'list_item_start': [ + {'text': '* ', 'spaces': 4}, + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'}, + {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'}, + {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'}, + {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'}, + {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 5 + expected = { + 'loose_item_start': [ + {'text': '* ', 'spaces': 4}, + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'Codey Monkey'}, + {'text': 'Tom Cruise Crazy'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(token) + + # test list 5 + expected = { + 'loose_item_start': [ + {'text': '1. ', 'spaces': 4}, + {'text': '2. ', 'spaces': 4}, + ], + 'text': [ + {'text': 'This is a list item with two paragraphs. Lorem ipsum dolor'}, + {'text': 'sit amet, consectetuer adipiscing elit. Aliquam hendrerit'}, + {'text': 'mi posuere lectus.'}, + {'text': 'Vestibulum enim wisi, viverra nec, fringilla in, laoreet'}, + {'text': 'vitae, risus. Donec sit amet nisl. Aliquam semper ipsum'}, + {'text': 'sit amet velit.'}, + {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 6 + expected = { + 'loose_item_start': [ + {'text': '* ', 'spaces': 4}, + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'This is a list item with two paragraphs.'}, + {'text': 'This is the second paragraph in the list item. You\'re'}, + {'text': 'only required to indent the first line. Lorem ipsum dolor'}, + {'text': 'sit amet, consectetuer adipiscing elit.'}, + {'text': 'Another item in the same list.'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 7 + expected = { + 'loose_item_start': [ + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'A list item with a blockquote:'}, + ], + 'list_item_end': [ + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 7 + expected = { + 'loose_item_start': [ + {'text': '* ', 'spaces': 4}, + ], + 'text': [ + {'text': 'A list item with a code block:'}, + ], + 'list_item_end': [ + {'spaces': 4}, + ] + } + tokens = process(tokens) + + # test list 7 + expected = { + 'loose_item_start': [ + {'text': '1. ', 'spaces': 4}, + {'text': '1. ', 'spaces': 4}, + ], + 'text': [ + {'text': 'This is a list item has a nested list.'}, + {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing'}, + {'text': 'elit. Aliquam hendrerit mi posuere lectus.'}, + ], + 'list_item_end': [ + {'spaces': 4}, + {'spaces': 4}, + ] + } + tokens = process(tokens) + def teardown(self): pass -- cgit v1.2.3