diff options
Diffstat (limited to 'docs/bin/markdown')
| -rwxr-xr-x | docs/bin/markdown | 1475 | 
1 files changed, 1475 insertions, 0 deletions
| diff --git a/docs/bin/markdown b/docs/bin/markdown new file mode 100755 index 0000000..6329172 --- /dev/null +++ b/docs/bin/markdown @@ -0,0 +1,1475 @@ +#!/usr/bin/perl +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004, John Gruber +# <http://daringfireball.net/projects/markdown> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +#   this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +#   notice, this list of conditions and the following disclaimer in the +#   documentation and/or other materials provided with the distribution. +# +# * Neither the name "Markdown" nor the names of its contributors may +#   be used to endorse or promote products derived from this software +#   without specific prior written permission. +# +# This software is provided by the copyright holders and contributors "as +# is" and any express or implied warranties, including, but not limited +# to, the implied warranties of merchantability and fitness for a +# particular purpose are disclaimed. In no event shall the copyright owner +# or contributors be liable for any direct, indirect, incidental, special, +# exemplary, or consequential damages (including, but not limited to, +# procurement of substitute goods or services; loss of use, data, or +# profits; or business interruption) however caused and on any theory of +# liability, whether in contract, strict liability, or tort (including +# negligence or otherwise) arising in any way out of the use of this +# software, even if advised of the possibility of such damage. + +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.1'; +# Tue 14 Dec 2004 + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + + +# +# Global default settings: +# +my $g_empty_element_suffix = " />";     # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ +	(?> 								# Atomic matching +	   [^\[\]]+							# Anything other than brackets +	 | +	   \[ +		 (??{ $g_nested_brackets })		# Recursive set of nested brackets +	   \] +	)* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { +	$g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { +	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + +	if ( (! $g_blosxom_use_meta) or +	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) +	     ){ +			$$body_ref  = Markdown($$body_ref); +     } +     1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT};  # Test to see if we're running in MT. +unless ($@) { +    require MT; +    import  MT; +    require MT::Template::Context; +    import  MT::Template::Context; + +	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0. +	unless ($@) { +		require MT::Plugin; +		import  MT::Plugin; +		my $plugin = new MT::Plugin({ +			name => "Markdown", +			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", +			doc_link => 'http://daringfireball.net/projects/markdown/' +		}); +		MT->add_plugin( $plugin ); +	} + +	MT::Template::Context->add_container_tag(MarkdownOptions => sub { +		my $ctx	 = shift; +		my $args = shift; +		my $builder = $ctx->stash('builder'); +		my $tokens = $ctx->stash('tokens'); + +		if (defined ($args->{'output'}) ) { +			$ctx->stash('markdown_output', lc $args->{'output'}); +		} + +		defined (my $str = $builder->build($ctx, $tokens) ) +			or return $ctx->error($builder->errstr); +		$str;		# return value +	}); + +	MT->add_text_filter('markdown' => { +		label     => 'Markdown', +		docs      => 'http://daringfireball.net/projects/markdown/', +		on_format => sub { +			my $text = shift; +			my $ctx  = shift; +			my $raw  = 0; +		    if (defined $ctx) { +		    	my $output = $ctx->stash('markdown_output'); +				if (defined $output  &&  $output =~ m/^html/i) { +					$g_empty_element_suffix = ">"; +					$ctx->stash('markdown_output', ''); +				} +				elsif (defined $output  &&  $output eq 'raw') { +					$raw = 1; +					$ctx->stash('markdown_output', ''); +				} +				else { +					$raw = 0; +					$g_empty_element_suffix = " />"; +				} +			} +			$text = $raw ? $text : Markdown($text); +			$text; +		}, +	}); + +	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: +	my $smartypants; + +	{ +		no warnings "once"; +		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; +	} + +	if ($smartypants) { +		MT->add_text_filter('markdown_with_smartypants' => { +			label     => 'Markdown With SmartyPants', +			docs      => 'http://daringfireball.net/projects/markdown/', +			on_format => sub { +				my $text = shift; +				my $ctx  = shift; +				if (defined $ctx) { +					my $output = $ctx->stash('markdown_output'); +					if (defined $output  &&  $output eq 'html') { +						$g_empty_element_suffix = ">"; +					} +					else { +						$g_empty_element_suffix = " />"; +					} +				} +				$text = Markdown($text); +				$text = $smartypants->($text, '1'); +			}, +		}); +	} +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + +    # We're only using $blosxom::version once; tell Perl not to warn us: +	no warnings 'once'; +    unless ( defined($blosxom::version) ) { +		use warnings; + +		#### Check for command-line switches: ################# +		my %cli_opts; +		use Getopt::Long; +		Getopt::Long::Configure('pass_through'); +		GetOptions(\%cli_opts, +			'version', +			'shortversion', +			'html4tags', +		); +		if ($cli_opts{'version'}) {		# Version info +			print "\nThis is Markdown, version $VERSION.\n"; +			print "Copyright 2004 John Gruber\n"; +			print "http://daringfireball.net/projects/markdown/\n\n"; +			exit 0; +		} +		if ($cli_opts{'shortversion'}) {		# Just the version number string. +			print $VERSION; +			exit 0; +		} +		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML +			$g_empty_element_suffix = ">"; +		} + + +		#### Process incoming text: ########################### +		my $text; +		{ +			local $/;               # Slurp the whole file +			$text = <>; +		} +        print Markdown($text); +    } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialChars(), so that any *'s or _'s in the <a> +# and <img> tags get encoded. +# +	my $text = shift; + +	# Clear the global hashes. If we don't clear these, you get conflicts +	# from other articles when generating a page which contains more than +	# one article (e.g. an index page that shows the N most recent +	# articles): +	%g_urls = (); +	%g_titles = (); +	%g_html_blocks = (); + + +	# Standardize line endings: +	$text =~ s{\r\n}{\n}g; 	# DOS to Unix +	$text =~ s{\r}{\n}g; 	# Mac to Unix + +	# Make sure $text ends with a couple of newlines: +	$text .= "\n\n"; + +	# Convert all tabs to spaces. +	$text = _Detab($text); + +	# Strip any lines consisting only of spaces and tabs. +	# This makes subsequent regexen easier to write, because we can +	# match consecutive blank lines with /\n+/ instead of something +	# contorted like /[ \t]*\n+/ . +	$text =~ s/^[ \t]+$//mg; + +	# Turn block-level HTML blocks into hash entries +	$text = _HashHTMLBlocks($text); + +	# Strip link definitions, store in hashes. +	$text = _StripLinkDefinitions($text); + +	$text = _RunBlockGamut($text); + +	$text = _UnescapeSpecialChars($text); + +	return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# +	my $text = shift; +	my $less_than_tab = $g_tab_width - 1; + +	# Link defs are in the form: ^[id]: url "optional title" +	while ($text =~ s{ +						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1 +						  [ \t]* +						  \n?				# maybe *one* newline +						  [ \t]* +						<?(\S+?)>?			# url = $2 +						  [ \t]* +						  \n?				# maybe one newline +						  [ \t]* +						(?: +							(?<=\s)			# lookbehind for whitespace +							["(] +							(.+?)			# title = $3 +							[")] +							[ \t]* +						)?	# title is optional +						(?:\n+|\Z) +					} +					{}mx) { +		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive +		if ($3) { +			$g_titles{lc $1} = $3; +			$g_titles{lc $1} =~ s/"/"/g; +		} +	} + +	return $text; +} + + +sub _HashHTMLBlocks { +	my $text = shift; +	my $less_than_tab = $g_tab_width - 1; + +	# Hashify HTML blocks: +	# We only want to do this for block-level HTML tags, such as headers, +	# lists, and tables. That's because we still want to wrap <p>s around +	# "paragraphs" that are wrapped in non-block-level tags, such as anchors, +	# phrase emphasis, and spans. The list of tags we're looking for is +	# hard-coded: +	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; +	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + +	# First, look for nested blocks, e.g.: +	# 	<div> +	# 		<div> +	# 		tags for inner block must be indented. +	# 		</div> +	# 	</div> +	# +	# The outermost tags must start at the left margin for this to match, and +	# the inner nested divs must be indented. +	# We need to do this before the next, more liberal match, because the next +	# match will start at the first `<div>` and stop at the first `</div>`. +	$text =~ s{ +				(						# save in $1 +					^					# start of line  (with /m) +					<($block_tags_a)	# start tag = $2 +					\b					# word break +					(.*\n)*?			# any number of lines, minimally matching +					</\2>				# the matching end tag +					[ \t]*				# trailing spaces/tabs +					(?=\n+|\Z)	# followed by a newline or end of document +				) +			}{ +				my $key = md5_hex($1); +				$g_html_blocks{$key} = $1; +				"\n\n" . $key . "\n\n"; +			}egmx; + + +	# +	# Now match more liberally, simply from `\n<tag>` to `</tag>\n` +	# +	$text =~ s{ +				(						# save in $1 +					^					# start of line  (with /m) +					<($block_tags_b)	# start tag = $2 +					\b					# word break +					(.*\n)*?			# any number of lines, minimally matching +					.*</\2>				# the matching end tag +					[ \t]*				# trailing spaces/tabs +					(?=\n+|\Z)	# followed by a newline or end of document +				) +			}{ +				my $key = md5_hex($1); +				$g_html_blocks{$key} = $1; +				"\n\n" . $key . "\n\n"; +			}egmx; +	# Special case just for <hr />. It was easier to make a special case than +	# to make the other regex more complicated. +	$text =~ s{ +				(?: +					(?<=\n\n)		# Starting after a blank line +					|				# or +					\A\n?			# the beginning of the doc +				) +				(						# save in $1 +					[ ]{0,$less_than_tab} +					<(hr)				# start tag = $2 +					\b					# word break +					([^<>])*?			# +					/?>					# the matching end tag +					[ \t]* +					(?=\n{2,}|\Z)		# followed by a blank line or end of document +				) +			}{ +				my $key = md5_hex($1); +				$g_html_blocks{$key} = $1; +				"\n\n" . $key . "\n\n"; +			}egx; + +	# Special case for standalone HTML comments: +	$text =~ s{ +				(?: +					(?<=\n\n)		# Starting after a blank line +					|				# or +					\A\n?			# the beginning of the doc +				) +				(						# save in $1 +					[ ]{0,$less_than_tab} +					(?s: +						<! +						(--.*?--\s*)+ +						> +					) +					[ \t]* +					(?=\n{2,}|\Z)		# followed by a blank line or end of document +				) +			}{ +				my $key = md5_hex($1); +				$g_html_blocks{$key} = $1; +				"\n\n" . $key . "\n\n"; +			}egx; + + +	return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# +	my $text = shift; + +	$text = _DoHeaders($text); + +	# Do Horizontal Rules: +	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; +	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; +	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + +	$text = _DoLists($text); + +	$text = _DoCodeBlocks($text); + +	$text = _DoBlockQuotes($text); + +	# We already ran _HashHTMLBlocks() before, in Markdown(), but that +	# was to escape raw HTML in the original Markdown source. This time, +	# we're escaping the markup we've just created, so that we don't wrap +	# <p> tags around block-level tags. +	$text = _HashHTMLBlocks($text); + +	$text = _FormParagraphs($text); + +	return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# +	my $text = shift; + +	$text = _DoCodeSpans($text); + +	$text = _EscapeSpecialChars($text); + +	# Process anchor and image tags. Images must come first, +	# because ![foo][f] looks like an anchor. +	$text = _DoImages($text); +	$text = _DoAnchors($text); + +	# Make links out of things like `<http://example.com/>` +	# Must come after _DoAnchors(), because you can use < and > +	# delimiters in inline links like [this](<url>). +	$text = _DoAutoLinks($text); + +	$text = _EncodeAmpsAndAngles($text); + +	$text = _DoItalicsAndBold($text); + +	# Do hard breaks: +	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; + +	return $text; +} + + +sub _EscapeSpecialChars { +	my $text = shift; +	my $tokens ||= _TokenizeHTML($text); + +	$text = '';   # rebuild $text from the tokens +# 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags. +# 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; + +	foreach my $cur_token (@$tokens) { +		if ($cur_token->[0] eq "tag") { +			# Within tags, encode * and _ so they don't conflict +			# with their use in Markdown for italics and strong. +			# We're replacing each such character with its +			# corresponding MD5 checksum value; this is likely +			# overkill, but it should prevent us from colliding +			# with the escape values by accident. +			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx; +			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx; +			$text .= $cur_token->[1]; +		} else { +			my $t = $cur_token->[1]; +			$t = _EncodeBackslashEscapes($t); +			$text .= $t; +		} +	} +	return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML <a> tags. +# +	my $text = shift; + +	# +	# First, handle reference-style links: [link text] [id] +	# +	$text =~ s{ +		(					# wrap whole match in $1 +		  \[ +		    ($g_nested_brackets)	# link text = $2 +		  \] + +		  [ ]?				# one optional space +		  (?:\n[ ]*)?		# one optional newline followed by spaces + +		  \[ +		    (.*?)		# id = $3 +		  \] +		) +	}{ +		my $result; +		my $whole_match = $1; +		my $link_text   = $2; +		my $link_id     = lc $3; + +		if ($link_id eq "") { +			$link_id = lc $link_text;     # for shortcut links like [this][]. +		} + +		if (defined $g_urls{$link_id}) { +			my $url = $g_urls{$link_id}; +			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid +			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. +			$result = "<a href=\"$url\""; +			if ( defined $g_titles{$link_id} ) { +				my $title = $g_titles{$link_id}; +				$title =~ s! \* !$g_escape_table{'*'}!gx; +				$title =~ s!  _ !$g_escape_table{'_'}!gx; +				$result .=  " title=\"$title\""; +			} +			$result .= ">$link_text</a>"; +		} +		else { +			$result = $whole_match; +		} +		$result; +	}xsge; + +	# +	# Next, inline-style links: [link text](url "optional title") +	# +	$text =~ s{ +		(				# wrap whole match in $1 +		  \[ +		    ($g_nested_brackets)	# link text = $2 +		  \] +		  \(			# literal paren +		  	[ \t]* +			<?(.*?)>?	# href = $3 +		  	[ \t]* +			(			# $4 +			  (['"])	# quote char = $5 +			  (.*?)		# Title = $6 +			  \5		# matching quote +			)?			# title is optional +		  \) +		) +	}{ +		my $result; +		my $whole_match = $1; +		my $link_text   = $2; +		my $url	  		= $3; +		my $title		= $6; + +		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid +		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. +		$result = "<a href=\"$url\""; + +		if (defined $title) { +			$title =~ s/"/"/g; +			$title =~ s! \* !$g_escape_table{'*'}!gx; +			$title =~ s!  _ !$g_escape_table{'_'}!gx; +			$result .=  " title=\"$title\""; +		} + +		$result .= ">$link_text</a>"; + +		$result; +	}xsge; + +	return $text; +} + + +sub _DoImages { +# +# Turn Markdown image shortcuts into <img> tags. +# +	my $text = shift; + +	# +	# First, handle reference-style labeled images: ![alt text][id] +	# +	$text =~ s{ +		(				# wrap whole match in $1 +		  !\[ +		    (.*?)		# alt text = $2 +		  \] + +		  [ ]?				# one optional space +		  (?:\n[ ]*)?		# one optional newline followed by spaces + +		  \[ +		    (.*?)		# id = $3 +		  \] + +		) +	}{ +		my $result; +		my $whole_match = $1; +		my $alt_text    = $2; +		my $link_id     = lc $3; + +		if ($link_id eq "") { +			$link_id = lc $alt_text;     # for shortcut links like ![this][]. +		} + +		$alt_text =~ s/"/"/g; +		if (defined $g_urls{$link_id}) { +			my $url = $g_urls{$link_id}; +			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid +			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. +			$result = "<img src=\"$url\" alt=\"$alt_text\""; +			if (defined $g_titles{$link_id}) { +				my $title = $g_titles{$link_id}; +				$title =~ s! \* !$g_escape_table{'*'}!gx; +				$title =~ s!  _ !$g_escape_table{'_'}!gx; +				$result .=  " title=\"$title\""; +			} +			$result .= $g_empty_element_suffix; +		} +		else { +			# If there's no such link ID, leave intact: +			$result = $whole_match; +		} + +		$result; +	}xsge; + +	# +	# Next, handle inline images:   +	# Don't forget: encode * and _ + +	$text =~ s{ +		(				# wrap whole match in $1 +		  !\[ +		    (.*?)		# alt text = $2 +		  \] +		  \(			# literal paren +		  	[ \t]* +			<?(\S+?)>?	# src url = $3 +		  	[ \t]* +			(			# $4 +			  (['"])	# quote char = $5 +			  (.*?)		# title = $6 +			  \5		# matching quote +			  [ \t]* +			)?			# title is optional +		  \) +		) +	}{ +		my $result; +		my $whole_match = $1; +		my $alt_text    = $2; +		my $url	  		= $3; +		my $title		= ''; +		if (defined($6)) { +			$title		= $6; +		} + +		$alt_text =~ s/"/"/g; +		$title    =~ s/"/"/g; +		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid +		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold. +		$result = "<img src=\"$url\" alt=\"$alt_text\""; +		if (defined $title) { +			$title =~ s! \* !$g_escape_table{'*'}!gx; +			$title =~ s!  _ !$g_escape_table{'_'}!gx; +			$result .=  " title=\"$title\""; +		} +		$result .= $g_empty_element_suffix; + +		$result; +	}xsge; + +	return $text; +} + + +sub _DoHeaders { +	my $text = shift; + +	# Setext-style headers: +	#	  Header 1 +	#	  ======== +	# +	#	  Header 2 +	#	  -------- +	# +	$text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ +		"<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n"; +	}egmx; + +	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ +		"<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n"; +	}egmx; + + +	# atx-style headers: +	#	# Header 1 +	#	## Header 2 +	#	## Header 2 with closing hashes ## +	#	... +	#	###### Header 6 +	# +	$text =~ s{ +			^(\#{1,6})	# $1 = string of #'s +			[ \t]* +			(.+?)		# $2 = Header text +			[ \t]* +			\#*			# optional closing #'s (not counted) +			\n+ +		}{ +			my $h_level = length($1); +			"<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n"; +		}egmx; + +	return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# +	my $text = shift; +	my $less_than_tab = $g_tab_width - 1; + +	# Re-usable patterns to match list item bullets and number markers: +	my $marker_ul  = qr/[*+-]/; +	my $marker_ol  = qr/\d+[.]/; +	my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + +	# Re-usable pattern to match any entirel ul or ol list: +	my $whole_list = qr{ +		(								# $1 = whole list +		  (								# $2 +			[ ]{0,$less_than_tab} +			(${marker_any})				# $3 = first list item marker +			[ \t]+ +		  ) +		  (?s:.+?) +		  (								# $4 +			  \z +			| +			  \n{2,} +			  (?=\S) +			  (?!						# Negative lookahead for another list item marker +				[ \t]* +				${marker_any}[ \t]+ +			  ) +		  ) +		) +	}mx; + +	# We use a different prefix before nested lists than top-level lists. +	# See extended comment in _ProcessListItems(). +	# +	# Note: There's a bit of duplication here. My original implementation +	# created a scalar regex pattern as the conditional result of the test on +	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx +	# substitution once, using the scalar as the pattern. This worked, +	# everywhere except when running under MT on my hosting account at Pair +	# Networks. There, this caused all rebuilds to be killed by the reaper (or +	# perhaps they crashed, but that seems incredibly unlikely given that the +	# same script on the same server ran fine *except* under MT. I've spent +	# more time trying to figure out why this is happening than I'd like to +	# admit. My only guess, backed up by the fact that this workaround works, +	# is that Perl optimizes the substition when it can figure out that the +	# pattern will never change, and when this optimization isn't on, we run +	# afoul of the reaper. Thus, the slightly redundant code to that uses two +	# static s/// patterns rather than one conditional pattern. + +	if ($g_list_level) { +		$text =~ s{ +				^ +				$whole_list +			}{ +				my $list = $1; +				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; +				# Turn double returns into triple returns, so that we can make a +				# paragraph for the last item in a list, if necessary: +				$list =~ s/\n{2,}/\n\n\n/g; +				my $result = _ProcessListItems($list, $marker_any); +				$result = "<$list_type>\n" . $result . "</$list_type>\n"; +				$result; +			}egmx; +	} +	else { +		$text =~ s{ +				(?:(?<=\n\n)|\A\n?) +				$whole_list +			}{ +				my $list = $1; +				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; +				# Turn double returns into triple returns, so that we can make a +				# paragraph for the last item in a list, if necessary: +				$list =~ s/\n{2,}/\n\n\n/g; +				my $result = _ProcessListItems($list, $marker_any); +				$result = "<$list_type>\n" . $result . "</$list_type>\n"; +				$result; +			}egmx; +	} + + +	return $text; +} + + +sub _ProcessListItems { +# +#	Process the contents of a single ordered or unordered list, splitting it +#	into individual list items. +# + +	my $list_str = shift; +	my $marker_any = shift; + + +	# The $g_list_level global keeps track of when we're inside a list. +	# Each time we enter a list, we increment it; when we leave a list, +	# we decrement. If it's zero, we're not in a list anymore. +	# +	# We do this because when we're not inside a list, we want to treat +	# something like this: +	# +	#		I recommend upgrading to version +	#		8. Oops, now this line is treated +	#		as a sub-list. +	# +	# As a single paragraph, despite the fact that the second line starts +	# with a digit-period-space sequence. +	# +	# Whereas when we're inside a list (or sub-list), that line will be +	# treated as the start of a sub-list. What a kludge, huh? This is +	# an aspect of Markdown's syntax that's hard to parse perfectly +	# without resorting to mind-reading. Perhaps the solution is to +	# change the syntax rules such that sub-lists must start with a +	# starting cardinal number; e.g. "1." or "a.". + +	$g_list_level++; + +	# trim trailing blank lines: +	$list_str =~ s/\n{2,}\z/\n/; + + +	$list_str =~ s{ +		(\n)?							# leading line = $1 +		(^[ \t]*)						# leading whitespace = $2 +		($marker_any) [ \t]+			# list marker = $3 +		((?s:.+?)						# list item text   = $4 +		(\n{1,2})) +		(?= \n* (\z | \2 ($marker_any) [ \t]+)) +	}{ +		my $item = $4; +		my $leading_line = $1; +		my $leading_space = $2; + +		if ($leading_line or ($item =~ m/\n{2,}/)) { +			$item = _RunBlockGamut(_Outdent($item)); +		} +		else { +			# Recursion for sub-lists: +			$item = _DoLists(_Outdent($item)); +			chomp $item; +			$item = _RunSpanGamut($item); +		} + +		"<li>" . $item . "</li>\n"; +	}egmx; + +	$g_list_level--; +	return $list_str; +} + + + +sub _DoCodeBlocks { +# +#	Process Markdown `<pre><code>` blocks. +# + +	my $text = shift; + +	$text =~ s{ +			(?:\n\n|\A) +			(	            # $1 = the code block -- one or more lines, starting with a space/tab +			  (?: +			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces +			    .*\n+ +			  )+ +			) +			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc +		}{ +			my $codeblock = $1; +			my $result; # return value + +			$codeblock = _EncodeCode(_Outdent($codeblock)); +			$codeblock = _Detab($codeblock); +			$codeblock =~ s/\A\n+//; # trim leading newlines +			$codeblock =~ s/\s+\z//; # trim trailing whitespace + +			$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n"; + +			$result; +		}egmx; + +	return $text; +} + + +sub _DoCodeSpans { +# +# 	*	Backtick quotes are used for <code></code> spans. +# +# 	*	You can use multiple backticks as the delimiters if you want to +# 		include literal backticks in the code span. So, this input: +# +#         Just type ``foo `bar` baz`` at the prompt. +#      +#     	Will translate to: +#      +#         <p>Just type <code>foo `bar` baz</code> at the prompt.</p> +# +#		There's no arbitrary limit to the number of backticks you +#		can use as delimters. If you need three consecutive backticks +#		in your code, use four for delimiters, etc. +# +#	*	You can use spaces to get literal backticks at the edges: +# +#         ... type `` `bar` `` ... +# +#     	Turns to: +# +#         ... type <code>`bar`</code> ... +# + +	my $text = shift; + +	$text =~ s@ +			(`+)		# $1 = Opening run of ` +			(.+?)		# $2 = The code block +			(?<!`) +			\1			# Matching closer +			(?!`) +		@ + 			my $c = "$2"; + 			$c =~ s/^[ \t]*//g; # leading whitespace + 			$c =~ s/[ \t]*$//g; # trailing whitespace + 			$c = _EncodeCode($c); +			"<code>$c</code>"; +		@egsx; + +	return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# +    local $_ = shift; + +	# Encode all ampersands; HTML entities are not +	# entities within a Markdown code span. +	s/&/&/g; + +	# Encode $'s, but only if we're running under Blosxom. +	# (Blosxom interpolates Perl variables in article bodies.) +	{ +		no warnings 'once'; +    	if (defined($blosxom::version)) { +    		s/\$/$/g; +    	} +    } + + +	# Do the angle bracket song and dance: +	s! <  !<!gx; +	s! >  !>!gx; + +	# Now, escape characters that are magic in Markdown: +	s! \* !$g_escape_table{'*'}!gx; +	s! _  !$g_escape_table{'_'}!gx; +	s! {  !$g_escape_table{'{'}!gx; +	s! }  !$g_escape_table{'}'}!gx; +	s! \[ !$g_escape_table{'['}!gx; +	s! \] !$g_escape_table{']'}!gx; +	s! \\ !$g_escape_table{'\\'}!gx; + +	return $_; +} + + +sub _DoItalicsAndBold { +	my $text = shift; + +	# <strong> must go first: +	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } +		{<strong>$2</strong>}gsx; + +	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } +		{<em>$2</em>}gsx; + +	return $text; +} + + +sub _DoBlockQuotes { +	my $text = shift; + +	$text =~ s{ +		  (								# Wrap whole match in $1 +			( +			  ^[ \t]*>[ \t]?			# '>' at the start of a line +			    .+\n					# rest of the first line +			  (.+\n)*					# subsequent consecutive lines +			  \n*						# blanks +			)+ +		  ) +		}{ +			my $bq = $1; +			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting +			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines +			$bq = _RunBlockGamut($bq);		# recurse + +			$bq =~ s/^/  /g; +			# These leading spaces screw with <pre> content, so we need to fix that: +			$bq =~ s{ +					(\s*<pre>.+?</pre>) +				}{ +					my $pre = $1; +					$pre =~ s/^  //mg; +					$pre; +				}egsx; + +			"<blockquote>\n$bq\n</blockquote>\n\n"; +		}egmx; + + +	return $text; +} + + +sub _FormParagraphs { +# +#	Params: +#		$text - string to process with html <p> tags +# +	my $text = shift; + +	# Strip leading and trailing lines: +	$text =~ s/\A\n+//; +	$text =~ s/\n+\z//; + +	my @grafs = split(/\n{2,}/, $text); + +	# +	# Wrap <p> tags. +	# +	foreach (@grafs) { +		unless (defined( $g_html_blocks{$_} )) { +			$_ = _RunSpanGamut($_); +			s/^([ \t]*)/<p>/; +			$_ .= "</p>"; +		} +	} + +	# +	# Unhashify HTML blocks +	# +	foreach (@grafs) { +		if (defined( $g_html_blocks{$_} )) { +			$_ = $g_html_blocks{$_}; +		} +	} + +	return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + +	my $text = shift; + +	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: +	#   http://bumppo.net/projects/amputator/ + 	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + +	# Encode naked <'s + 	$text =~ s{<(?![a-z/?\$!])}{<}gi; + +	return $text; +} + + +sub _EncodeBackslashEscapes { +# +#   Parameter:  String. +#   Returns:    The string, with after processing the following backslash +#               escape sequences. +# +    local $_ = shift; + +    s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first. +    s! \\`   !$g_escape_table{'`'}!gx; +    s! \\\*  !$g_escape_table{'*'}!gx; +    s! \\_   !$g_escape_table{'_'}!gx; +    s! \\\{  !$g_escape_table{'{'}!gx; +    s! \\\}  !$g_escape_table{'}'}!gx; +    s! \\\[  !$g_escape_table{'['}!gx; +    s! \\\]  !$g_escape_table{']'}!gx; +    s! \\\(  !$g_escape_table{'('}!gx; +    s! \\\)  !$g_escape_table{')'}!gx; +    s! \\>   !$g_escape_table{'>'}!gx; +    s! \\\#  !$g_escape_table{'#'}!gx; +    s! \\\+  !$g_escape_table{'+'}!gx; +    s! \\\-  !$g_escape_table{'-'}!gx; +    s! \\\.  !$g_escape_table{'.'}!gx; +    s{ \\!  }{$g_escape_table{'!'}}gx; + +    return $_; +} + + +sub _DoAutoLinks { +	my $text = shift; + +	$text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi; + +	# Email addresses: <address@domain.foo> +	$text =~ s{ +		< +        (?:mailto:)? +		( +			[-.\w]+ +			\@ +			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ +		) +		> +	}{ +		_EncodeEmailAddress( _UnescapeSpecialChars($1) ); +	}egix; + +	return $text; +} + + +sub _EncodeEmailAddress { +# +#	Input: an email address, e.g. "foo@example.com" +# +#	Output: the email address as a mailto link, with each character +#		of the address encoded as either a decimal or hex entity, in +#		the hopes of foiling most address harvesting spam bots. E.g.: +# +#	  <a href="mailto:foo@e +#       xample.com">foo +#       @example.com</a> +# +#	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +#	mailing list: <http://tinyurl.com/yu7ue> +# + +	my $addr = shift; + +	srand; +	my @encode = ( +		sub { '&#' .                 ord(shift)   . ';' }, +		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, +		sub {                            shift          }, +	); + +	$addr = "mailto:" . $addr; + +	$addr =~ s{(.)}{ +		my $char = $1; +		if ( $char eq '@' ) { +			# this *must* be encoded. I insist. +			$char = $encode[int rand 1]->($char); +		} elsif ( $char ne ':' ) { +			# leave ':' alone (to spot mailto: later) +			my $r = rand; +			# roughly 10% raw, 45% hex, 45% dec +			$char = ( +				$r > .9   ?  $encode[2]->($char)  : +				$r < .45  ?  $encode[1]->($char)  : +							 $encode[0]->($char) +			); +		} +		$char; +	}gex; + +	$addr = qq{<a href="$addr">$addr</a>}; +	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + +	return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# +	my $text = shift; + +	while( my($char, $hash) = each(%g_escape_table) ) { +		$text =~ s/$hash/$char/g; +	} +    return $text; +} + + +sub _TokenizeHTML { +# +#   Parameter:  String containing HTML markup. +#   Returns:    Reference to an array of the tokens comprising the input +#               string. Each token is either a tag (possibly with nested, +#               tags contained therein, such as <a href="<MTFoo>">, or a +#               run of text between tags. Each element of the array is a +#               two-element array; the first is either 'tag' or 'text'; +#               the second is the actual value. +# +# +#   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +#       <http://www.bradchoate.com/past/mtregex.php> +# + +    my $str = shift; +    my $pos = 0; +    my $len = length $str; +    my @tokens; + +    my $depth = 6; +    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth); +    my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment +                   (?s: <\? .*? \?> ) |              # processing instruction +                   $nested_tags/ix;                   # nested tags + +    while ($str =~ m/($match)/g) { +        my $whole_tag = $1; +        my $sec_start = pos $str; +        my $tag_start = $sec_start - length $whole_tag; +        if ($pos < $tag_start) { +            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; +        } +        push @tokens, ['tag', $whole_tag]; +        $pos = pos $str; +    } +    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; +    \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# +	my $text = shift; + +	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; +	return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> +# +	my $text = shift; + +	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; +	return $text; +} + + +1; + +__END__ + + +=pod + +=head1 NAME + +B<Markdown> + + +=head1 SYNOPSIS + +B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] +    [ I<file> ... ] + + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can  use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like <div> and <table> as well). + +For more information about Markdown's syntax, see: + +    http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + +	Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + +    <br> + +instead of Markdown's default XHTML style tags, e.g.: + +    <br /> + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + +    support@daringfireball.net + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output Markdown actually produced. + + +=head1 VERSION HISTORY + +See the readme file for detailed release notes for this version. + +1.0.1 - 14 Dec 2004 + +1.0 - 28 Aug 2004 + + +=head1 AUTHOR + +    John Gruber +    http://daringfireball.net + +    PHP port and other contributions by Michel Fortin +    http://michelf.com + + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2003-2004 John Gruber +<http://daringfireball.net/> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, +  this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +  notice, this list of conditions and the following disclaimer in the +  documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may +  be used to endorse or promote products derived from this software +  without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut | 
