diff options
| -rw-r--r-- | md_tw.py | 54 | ||||
| -rw-r--r-- | tests/data/blexer-lists.md | 66 | ||||
| -rw-r--r-- | tests/test_md_tw.py | 225 | 
3 files changed, 345 insertions, 0 deletions
| @@ -73,6 +73,60 @@ class TWBlockLexer(mistune.BlockLexer):              'text': m.group(0)              }) +    def _process_list_item(self, cap, bull): +        cap = self.rules.list_item.findall(cap) + +        _next = False +        length = len(cap) + +        for i in range(length): +            item = cap[i][0] + +            # slurp and remove the bullet +            space = len(item) +            bullet = '' +            bm = self.rules.list_bullet.match(item) +            if bm: +                bullet = bm.group(0) + +            item = self.rules.list_bullet.sub('', item) + +            # outdent +            if '\n ' in item: +                space = space - len(item) +                pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) +                item = pattern.sub('', item) + +            # determine whether item is loose or not +            loose = _next +            if not loose and re.search(r'\n\n(?!\s*$)', item): +                loose = True + +            rest = len(item) +            if i != length - 1 and rest: +                _next = item[rest-1] == '\n' +                if not loose: +                    loose = _next + +            if loose: +                t = 'loose_item_start' +            else: +                t = 'list_item_start' + +            self.tokens.append({ +                'type': t, +                'text': bullet, +                'spaces': len(bullet) +                }) + +            # recurse +            self.parse(item, self.list_rules) + +            self.tokens.append({ +                'type': 'list_item_end', +                'spaces': len(bullet) +                }) +  class TWInlineLexer(mistune.InlineLexer):      """Text Wrap Inline level lexer for inline gramars.""" diff --git a/tests/data/blexer-lists.md b/tests/data/blexer-lists.md new file mode 100644 index 0000000..2a9a0dd --- /dev/null +++ b/tests/data/blexer-lists.md @@ -0,0 +1,66 @@ ++   Re: Your Brains ++   Shop Vac ++   Flickr + + +1.  First of May +2.  You Ruined Everything +3.  Sucker Punch + + +*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit. +    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, +    viverra nec, fringilla in, laoreet vitae, risus. +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit. +    Suspendisse id sem consectetuer libero luctus adipiscing. + + +*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit. +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, +viverra nec, fringilla in, laoreet vitae, risus. +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit. +Suspendisse id sem consectetuer libero luctus adipiscing. + + +*   Codey Monkey + +*   Tom Cruise Crazy + + +1.  This is a list item with two paragraphs. Lorem ipsum dolor +    sit amet, consectetuer adipiscing elit. Aliquam hendrerit +    mi posuere lectus. + +    Vestibulum enim wisi, viverra nec, fringilla in, laoreet +    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum +    sit amet velit. + +2.  Suspendisse id sem consectetuer libero luctus adipiscing. + + +*   This is a list item with two paragraphs. + +    This is the second paragraph in the list item. You're +only required to indent the first line. Lorem ipsum dolor +sit amet, consectetuer adipiscing elit. + +*   Another item in the same list. + + +*   A list item with a blockquote: + +    > This is a blockquote +    > inside a list item. + + +*   A list item with a code block: + +        $ :(){:|:&};: + + +1.  This is a list item has a nested list. + +    1.  Lorem ipsum dolor sit amet, consectetuer adipiscing +        elit. Aliquam hendrerit mi posuere lectus. + +            $ :(){:|:&};: diff --git a/tests/test_md_tw.py b/tests/test_md_tw.py index da7981c..d90db2c 100644 --- a/tests/test_md_tw.py +++ b/tests/test_md_tw.py @@ -103,6 +103,231 @@ class TestTWBlockLexer(object):          self._validate(tokens, 'hrule', expected_hrs) +    def test_parse_list_block(self): +        tokens = self._parse('blexer-lists.md') + +        def process(tokens): +            token = tokens.pop(0) +            while token: +                type_ = token['type'] + +                expected_token = None +                if type_ in expected: +                    expected_token = expected[type_].pop(0) + +                validate(token, expected_token) + +                if type_ == 'list_end': +                    break +                else: +                    token = tokens.pop(0) + +            return tokens + +        def validate(token, expected_token=None): +            type_ = token['type'] + +            if type_ == 'list_item_start': +                assert 'text' in token +                assert 'spaces' in token +            elif type_ == 'list_item_end': +                assert 'spaces' in token + +            if not expected_token: +                return + +            if 'text' in token: +                assert_equal(token['text'], expected_token['text']) +            if 'spaces' in token: +                assert_equal(token['spaces'], expected_token['spaces']) + +            return + +        # test list 1 +        expected = { +            'list_item_start': [ +                {'text': '+   ', 'spaces': 4}, +                {'text': '+   ', 'spaces': 4}, +                {'text': '+   ', 'spaces': 4} +                ], +            'text': [ +                {'text': 'Re: Your Brains'}, +                {'text': 'Shop Vac'}, +                {'text': 'Flickr'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                {'spaces': 4} +                ] +            } +        tokens = process(tokens) + +        # test list 2 +        expected = { +            'list_item_start': [ +                {'text': '1.  ', 'spaces': 4}, +                {'text': '2.  ', 'spaces': 4}, +                {'text': '3.  ', 'spaces': 4} +                ], +            'text': [ +                {'text': 'First of May'}, +                {'text': 'You Ruined Everything'}, +                {'text': 'Sucker Punch'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                {'spaces': 4} +                ] +            } +        token = process(tokens) + +        # test list 3 +        expected = { +            'list_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'}, +                {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'}, +                {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'}, +                {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'}, +                {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 4 +        expected = { +            'list_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'}, +                {'text': 'Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,'}, +                {'text': 'viverra nec, fringilla in, laoreet vitae, risus.'}, +                {'text': 'Donec sit amet nisl. Aliquam semper ipsum sit amet velit.'}, +                {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 5 +        expected = { +            'loose_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'Codey Monkey'}, +                {'text': 'Tom Cruise Crazy'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(token) + +        # test list 5 +        expected = { +            'loose_item_start': [ +                {'text': '1.  ', 'spaces': 4}, +                {'text': '2.  ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'This is a list item with two paragraphs. Lorem ipsum dolor'}, +                {'text': 'sit amet, consectetuer adipiscing elit. Aliquam hendrerit'}, +                {'text': 'mi posuere lectus.'}, +                {'text': 'Vestibulum enim wisi, viverra nec, fringilla in, laoreet'}, +                {'text': 'vitae, risus. Donec sit amet nisl. Aliquam semper ipsum'}, +                {'text': 'sit amet velit.'}, +                {'text': 'Suspendisse id sem consectetuer libero luctus adipiscing.'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 6 +        expected = { +            'loose_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'This is a list item with two paragraphs.'}, +                {'text': 'This is the second paragraph in the list item. You\'re'}, +                {'text': 'only required to indent the first line. Lorem ipsum dolor'}, +                {'text': 'sit amet, consectetuer adipiscing elit.'}, +                {'text': 'Another item in the same list.'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 7 +        expected = { +            'loose_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'A list item with a blockquote:'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 7 +        expected = { +            'loose_item_start': [ +                {'text': '*   ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'A list item with a code block:'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) + +        # test list 7 +        expected = { +            'loose_item_start': [ +                {'text': '1.  ', 'spaces': 4}, +                {'text': '1.  ', 'spaces': 4}, +                ], +            'text': [ +                {'text': 'This is a list item has a nested list.'}, +                {'text': 'Lorem ipsum dolor sit amet, consectetuer adipiscing'}, +                {'text': 'elit. Aliquam hendrerit mi posuere lectus.'}, +                ], +            'list_item_end': [ +                {'spaces': 4}, +                {'spaces': 4}, +                ] +            } +        tokens = process(tokens) +      def teardown(self):          pass | 
