From 3761e0e2a67703f4ee18274051c39c6cac226ae2 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Fri, 17 Dec 2010 13:34:03 +0100 Subject: [PATCH 01/45] Use __FILE__ rather than $0 to find self This allows MultiMarkdown.pl to be used as a module and still find the correct ASCIIMathML.pm --- bin/MultiMarkdown.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 8f36adf..9120023 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -37,7 +37,7 @@ package MultiMarkdown; unless ($@) { require Text::ASCIIMathML; } else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl - my $me = $0; # Where am I? + my $me = __FILE__; # Where am I? # Am I running in Windoze? my $os = $^O; From c391fcaf381811cc6dde02abd26a2071427fa4cd Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Fri, 17 Dec 2010 11:56:26 +0100 Subject: [PATCH 02/45] follow symlinks to find the support module --- bin/mmd2XHTML.pl | 5 ++++- bin/mmd2web.pl | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/mmd2XHTML.pl b/bin/mmd2XHTML.pl index 79af285..2ba6f26 100755 --- a/bin/mmd2XHTML.pl +++ b/bin/mmd2XHTML.pl @@ -82,6 +82,7 @@ sub LocateMMD { # Determine where MMD is installed. Use a "common installation" # if available. + my $sym = dirname(readlink($me)); $me = dirname($me); if ($os =~ /MSWin/) { @@ -111,6 +112,8 @@ sub LocateMMD { if ( -f "$me/MultiMarkdown/Support.pm") { $MMDPath = "$me/.."; + } elsif ( -f "$sym/MultiMarkdown/Support.pm") { + $MMDPath = "$sym/.."; } # Next, look in user's home directory, then in common directories @@ -215,4 +218,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2web.pl b/bin/mmd2web.pl index ec540c2..db59e77 100755 --- a/bin/mmd2web.pl +++ b/bin/mmd2web.pl @@ -87,6 +87,8 @@ sub LocateMMD { # Determine where MMD is installed. Use a "common installation" # if available. + my $sym = dirname(readlink($me)); + print STDERR "sym: $sym \n"; $me = dirname($me); if ($os =~ /MSWin/) { @@ -116,6 +118,8 @@ sub LocateMMD { if ( -f "$me/MultiMarkdown/Support.pm") { $MMDPath = "$me/.."; + } elsif ( -f "$sym/MultiMarkdown/Support.pm") { + $MMDPath = "$sym/.."; } # Next, look in user's home directory, then in common directories @@ -220,4 +224,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut From 83f3ddd7fc951d5b903129e798b6337568152175 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Fri, 17 Dec 2010 13:40:18 +0100 Subject: [PATCH 03/45] Start lib-ification of MultiMarkdown As a first step, all modules are moved to lib and the MultiMarkdown.pl executable is renamed to lib/MultiMarkdown.pm, while bin/MultiMarkdown.pl is replaced by a simple wrapper that just invokes the MultiMarkdown module. --- bin/MultiMarkdown.pl | 2694 +----------------------- {bin => lib}/ASCIIMathML.pm | 0 lib/MultiMarkdown.pm | 2699 +++++++++++++++++++++++++ {bin => lib}/MultiMarkdown/Support.pm | 0 4 files changed, 2707 insertions(+), 2686 deletions(-) rename {bin => lib}/ASCIIMathML.pm (100%) create mode 100755 lib/MultiMarkdown.pm rename {bin => lib}/MultiMarkdown/Support.pm (100%) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 9120023..840c006 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -1,2699 +1,21 @@ #!/usr/bin/env perl -# MultiMarkdown -- A modification of John Gruber's original Markdown -# that adds new features and an output format that can more readily -# be converted into other document formats -# -# $Id: MultiMarkdown.pl 525 2009-06-15 18:45:44Z fletcher $ -# -# Original Code Copyright (c) 2004-2007 John Gruber -# -# -# MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney -# -# -# MultiMarkdown Version 2.0.b6 -# -# Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007 -# -# -# TODO: Change math mode delimiter? -# TODO: Still need to get the glossary working in non-memoir documents -# TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly" -# TODO: Look into discussion re: assigning classes to div's/span's on Markdown list. -# TODO: Improve support for tables with long items and overall width in LaTeX -# TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet -# TODO: Create utilities to convert MMD tables to/from tab-delimited - - -package MultiMarkdown; require 5.006_000; use strict; use warnings; use File::Basename; +use File::Spec; -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require Text::ASCIIMathML; -} else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl - my $me = __FILE__; # Where am I? - - # Am I running in Windoze? - my $os = $^O; - - # Get just the directory portion - if ($os =~ /MSWin/) { - $me = dirname($me) . "\\"; - } else { - $me = dirname($me) . "/"; - } - require $me ."ASCIIMathML.pm"; -} - - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION $g_use_metadata $g_base_url - $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser); -$VERSION = '2.0.b6'; - -$mathParser = new Text::ASCIIMathML(); - -## Disabled; causes problems under Perl 5.6.1: -# use utf8; -# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; -my $g_allow_mathml = 1; -my $g_base_header_level = 1; - -# -# Globals: -# - -# Reusable patterns to match balanced [brackets] and (parens). See -# Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my ($g_nested_brackets, $g_nested_parens); -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - -# Doesn't allow for whitespace, because we're using it to match URLs: -$g_nested_parens = qr{ - (?> # Atomic matching - [^()\s]+ # Anything other than parens or whitespace - | - \( - (??{ $g_nested_parens }) # Recursive set of nested brackets - \) - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls = (); -my %g_titles= (); -my %g_html_blocks = (); -my %g_metadata = (); -my %g_metadata_newline = (); -my %g_crossrefs = (); -my %g_footnotes = (); -my %g_attributes = (); -my @g_used_footnotes = (); -my $g_footnote_counter = 0; - -my $g_citation_counter = 0; -my @g_used_references = (); -my %g_references = (); -$g_bibliography_title = "Bibliography"; - -$g_use_metadata = 1; -$g_metadata_newline{default} = "\n"; -$g_metadata_newline{keywords} = ", "; -my $g_document_format = ""; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "MultiMarkdown", - description => "Based on the original Markdown", - doc_link => 'http://fletcherpenney.net/multimarkdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MultiMarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('multimarkdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('multimarkdown' => { - label => 'MultiMarkdown', - docs => 'http://fletcherpenney.net/MultiMarkdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('multimarkdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('multimarkdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('multimarkdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('multimarkdown_with_smartypants' => { - label => 'MultiMarkdown With SmartyPants', - docs => 'http://fletcherpenney.net/MultiMarkdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('multimarkdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is MultiMarkdown, version $VERSION.\n"; - print "Original code Copyright 2004 John Gruber\n"; - print "MultiMarkdown changes Copyright 2005-2009 Fletcher Penney\n"; - print "http://fletcherpenney.net/multimarkdown/\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print Markdown($text); - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the -# and tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - %g_metadata = (); - %g_crossrefs = (); - %g_footnotes = (); - @g_used_footnotes = (); - $g_footnote_counter = 0; - @g_used_references = (); - %g_references = (); - $g_citation_counter = 0; - %g_attributes = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Strip out MetaData - $text = _ParseMetaData($text) if $g_use_metadata; - - # And recheck for leading blank lines - $text =~ s/^\n+//s; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip footnote and link definitions, store in hashes. - $text = _StripFootnoteDefinitions($text); - - $text = _StripLinkDefinitions($text); - - _GenerateImageCrossRefs($text); - - $text = _StripMarkdownReferences($text); - - $text = _RunBlockGamut($text); - - $text = _DoMarkdownCitations($text); - - $text = _DoFootnotes($text); - - $text = _UnescapeSpecialChars($text); - - # Clean encoding within HTML comments - $text = _UnescapeComments($text); - - $text = _FixFootnoteParagraphs($text); - $text .= _PrintFootnotes(); - - $text .= _PrintMarkdownBibliography(); - - $text = _ConvertCopyright($text); - - if (lc($g_document_format) =~ /^complete\s*$/i) { - return xhtmlMetaData() . "\n\n" . $text . "\n\n"; - } elsif (lc($g_document_format) =~ /^snippet\s*$/i) { - return $text . "\n"; - } else { - return $g_document_format . textMetaData() . $text . "\n"; - } - -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - # Pattern altered for MultiMarkdown - # in order to not match citations or footnotes - ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - - # MultiMarkdown addition for attribute support - \n? - ( # Attributes = $4 - (?<=\s) # lookbehind for whitespace - (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))* - )? - [ \t]* - # /addition - (?:\n+|\Z) - } - {}mx) { -# $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - $g_urls{lc $1} = $2; # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } - - # MultiMarkdown addition " - if ($4) { - $g_attributes{lc $1} = $4; - } - # /addition - } - - return $text; -} - -sub _StripHTML { - # Strip (X)HTML code from string - my $text = shift; - - $text =~ s/<.*?>//g; - - return $text; -} - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags = qr{ - (?: - p | div | h[1-6] | blockquote | pre | table | - dl | ol | ul | script | noscript | form | - fieldset | iframe | ins | del - ) - }x; # MultiMarkdown does not include `math` in the above list so that - # Equations can optionally be included in separate paragraphs - - my $tag_attrs = qr{ - (?: # Match one attr name/value pair - \s+ # There needs to be at least some whitespace - # before each attribute name. - [\w.:_-]+ # Attribute name - \s*=\s* - (?: - ".+?" # "Attribute value" - | - '.+?' # 'Attribute value' - ) - )* # Zero or more - }x; - - my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; - my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; - my $close_tag = undef; # let Text::Balanced handle this - - use Text::Balanced qw(gen_extract_tagged); - my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); - - my @chunks; - ## TO-DO: the 0,3 on the next line ought to respect the - ## tabwidth, or else, we should mandate 4-space tabwidth and - ## be done with it: - while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { - my $cur_line = $1; - if (defined $2) { - # current line could be start of code block - - my ($tag, $remainder) = $extract_block->($cur_line . $text); - if ($tag) { - my $key = md5_hex($tag); - $g_html_blocks{$key} = $tag; - push @chunks, "\n\n" . $key . "\n\n"; - $text = $remainder; - } - else { - # No tag match, so toss $cur_line into @chunks - push @chunks, $cur_line; - } - } - else { - # current line could NOT be start of code block - push @chunks, $cur_line; - } - - } - push @chunks, $text; # Whatever is left. - - $text = join '', @chunks; - - - - # Special case just for


. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # PHP and ASP-style processor instructions ( and <%…%>) - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - <([?%]) # $2 - .*? - \2> - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do tables first to populate the table id's for cross-refs - # Escape
 so we don't get greedy with tables
-	$text = _DoTables($text);
-	
-	# And now, protect our tables
-	$text = _HashHTMLBlocks($text);
-
-	# Do Horizontal Rules:
-	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags.
-	$text = _HashHTMLBlocks($text);
-	$text = _FormParagraphs($text);
-
-	return $text;
-}
-
-
-sub _RunSpanGamut {
-#
-# These are all the transformations that occur *within* block-level
-# tags like paragraphs, headers, and list items.
-#
-	my $text = shift;
-
-	$text = _DoCodeSpans($text);
-	$text = _DoMathSpans($text);
-	$text = _EscapeSpecialCharsWithinTagAttributes($text);
-	$text = _EncodeBackslashEscapes($text);
-
-	# Process anchor and image tags. Images must come first,
-	# because ![foo][f] looks like an anchor.
-	$text = _DoImages($text);
-	$text = _DoAnchors($text);	
-
-	# Make links out of things like ``
-	# Must come after _DoAnchors(), because you can use < and >
-	# delimiters in inline links like [this]().
-	$text = _DoAutoLinks($text);
-	$text = _EncodeAmpsAndAngles($text);
-	$text = _DoItalicsAndBold($text);
-
-	# Do hard breaks:
-	$text =~ s/ {2,}\n/  -- encode [\ ` * _] so they
-# don't conflict with their use in Markdown for code, italics and strong.
-# We're replacing each such character with its corresponding MD5 checksum
-# value; this is likely overkill, but it should prevent us from colliding
-# with the escape values by accident.
-#
-	my $text = shift;
-	my $tokens ||= _TokenizeHTML($text);
-	$text = '';   # rebuild $text from the tokens
-
-	foreach my $cur_token (@$tokens) {
-		if ($cur_token->[0] eq "tag") {
-			$cur_token->[1] =~  s! \\ !$g_escape_table{'\\'}!gx;
-			$cur_token->[1] =~  s{ (?<=.)(?=.)  }{$g_escape_table{'`'}}gx;
-			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
-			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
-		}
-		$text .= $cur_token->[1];
-	}
-	return $text;
-}
-
-
-sub _DoAnchors {
-#
-# Turn Markdown link shortcuts into XHTML  tags.
-#
-	my $text = shift;
-
-	#
-	# First, handle reference-style links: [link text] [id]
-	#
-	$text =~ s{
-		(					# wrap whole match in $1
-		  \[
-		    ($g_nested_brackets)	# link text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $link_text   = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $link_text;     # for shortcut links like [this][].
-		}
-
-		# Allow automatic cross-references to headers
-		my $label = Header2Label($link_id);
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
-			$result = "$}{$1};					# Remove <>'s surrounding URL, if present
-		$result = " tags.
-#
-	my $text = shift;
-
-	#
-	# First, handle reference-style labeled images: ![alt text][id]
-	#
-	$text =~ s{
-		(				# wrap whole match in $1
-		  !\[
-		    (.*?)		# alt text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $alt_text    = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $alt_text;     # for shortcut links like ![this][].
-		}
-
-		$alt_text =~ s/"/"/g;
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
-			
-			my $label = Header2Label($alt_text);
-			$g_crossrefs{$label} = "#$label";
-			if (! defined $g_titles{$link_id}) {
-				$g_titles{$link_id} = $alt_text;
-			}
-			
-			$result = "\"$alt_text\"";$}{$1};					# Remove <>'s surrounding URL, if present
-
-		my $label = Header2Label($alt_text);
-		$g_crossrefs{$label} = "#$label";
-#		$g_titles{$label} = $alt_text;			# I think this line should not be here
-			
-		$result = "\"$alt_text\"";"  .  $header  .  "\n\n";
-	}egmx;
-
-	$text =~ s{ ^(.+?)(?:\s*(?"  .  $header  .  "\n\n";
-	}egmx;
-
-
-	# atx-style headers:
-	#	# Header 1
-	#	## Header 2
-	#	## Header 2 with closing hashes ##
-	#	...
-	#	###### Header 6
-	#
-	$text =~ s{
-			^(\#{1,6})	# $1 = string of #'s
-			[ \t]*
-			(.+?)		# $2 = Header text
-			[ \t]*
-			(?:(?"  .  $header  .  "\n\n";
-		}egmx;
-
-	return $text;
-}
-
-
-sub _DoLists {
-#
-# Form HTML ordered (numbered) and unordered (bulleted) lists.
-#
-	my $text = shift;
-	my $less_than_tab = $g_tab_width - 1;
-
-	# Re-usable patterns to match list item bullets and number markers:
-	my $marker_ul  = qr/[*+-]/;
-	my $marker_ol  = qr/\d+[.]/;
-	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
-
-	# Re-usable pattern to match any entirel ul or ol list:
-	my $whole_list = qr{
-		(								# $1 = whole list
-		  (								# $2
-			[ ]{0,$less_than_tab}
-			(${marker_any})				# $3 = first list item marker
-			[ \t]+
-		  )
-		  (?s:.+?)
-		  (								# $4
-			  \z
-			|
-			  \n{2,}
-			  (?=\S)
-			  (?!						# Negative lookahead for another list item marker
-				[ \t]*
-				${marker_any}[ \t]+
-			  )
-		  )
-		)
-	}mx;
-
-	# We use a different prefix before nested lists than top-level lists.
-	# See extended comment in _ProcessListItems().
-	#
-	# Note: There's a bit of duplication here. My original implementation
-	# created a scalar regex pattern as the conditional result of the test on
-	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
-	# substitution once, using the scalar as the pattern. This worked,
-	# everywhere except when running under MT on my hosting account at Pair
-	# Networks. There, this caused all rebuilds to be killed by the reaper (or
-	# perhaps they crashed, but that seems incredibly unlikely given that the
-	# same script on the same server ran fine *except* under MT. I've spent
-	# more time trying to figure out why this is happening than I'd like to
-	# admit. My only guess, backed up by the fact that this workaround works,
-	# is that Perl optimizes the substition when it can figure out that the
-	# pattern will never change, and when this optimization isn't on, we run
-	# afoul of the reaper. Thus, the slightly redundant code that uses two
-	# static s/// patterns rather than one conditional pattern.
-
-	if ($g_list_level) {
-		$text =~ s{
-				^
-				$whole_list
-			}{
-				my $list = $1;
-				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
-
-				# Turn double returns into triple returns, so that we can make a
-				# paragraph for the last item in a list, if necessary:
-				$list =~ s/\n{2,}/\n\n\n/g;
-				my $result = _ProcessListItems($list, $marker_any);
-
-				# Trim any trailing whitespace, to put the closing ``
-				# up on the preceding line, to get it past the current stupid
-				# HTML block parser. This is a hack to work around the terrible
-				# hack that is the HTML block parser.
-				$result =~ s{\s+$}{};
-				$result = "<$list_type>" . $result . "\n";
-				$result;
-			}egmx;
-	}
-	else {
-		$text =~ s{
-				(?:(?<=\n\n)|\A\n?)
-				$whole_list
-			}{
-				my $list = $1;
-				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
-				# Turn double returns into triple returns, so that we can make a
-				# paragraph for the last item in a list, if necessary:
-				$list =~ s/\n{2,}/\n\n\n/g;
-				my $result = _ProcessListItems($list, $marker_any);
-				$result = "<$list_type>\n" . $result . "\n";
-				$result;
-			}egmx;
-	}
-
-
-	return $text;
-}
-
-
-sub _ProcessListItems {
-#
-#	Process the contents of a single ordered or unordered list, splitting it
-#	into individual list items.
-#
-
-	my $list_str = shift;
-	my $marker_any = shift;
-
-
-	# The $g_list_level global keeps track of when we're inside a list.
-	# Each time we enter a list, we increment it; when we leave a list,
-	# we decrement. If it's zero, we're not in a list anymore.
-	#
-	# We do this because when we're not inside a list, we want to treat
-	# something like this:
-	#
-	#		I recommend upgrading to version
-	#		8. Oops, now this line is treated
-	#		as a sub-list.
-	#
-	# As a single paragraph, despite the fact that the second line starts
-	# with a digit-period-space sequence.
-	#
-	# Whereas when we're inside a list (or sub-list), that line will be
-	# treated as the start of a sub-list. What a kludge, huh? This is
-	# an aspect of Markdown's syntax that's hard to parse perfectly
-	# without resorting to mind-reading. Perhaps the solution is to
-	# change the syntax rules such that sub-lists must start with a
-	# starting cardinal number; e.g. "1." or "a.".
-
-	$g_list_level++;
-
-	# trim trailing blank lines:
-	$list_str =~ s/\n{2,}\z/\n/;
-
-
-	$list_str =~ s{
-		(\n)?							# leading line = $1
-		(^[ \t]*)						# leading whitespace = $2
-		($marker_any) [ \t]+			# list marker = $3
-		((?s:.+?)						# list item text   = $4
-		(\n{1,2}))
-		(?= \n* (\z | \2 ($marker_any) [ \t]+))
-	}{
-		my $item = $4;
-		my $leading_line = $1;
-		my $leading_space = $2;
-
-		if ($leading_line or ($item =~ m/\n{2,}/)) {
-			$item = _RunBlockGamut(_Outdent($item));
-		}
-		else {
-			# Recursion for sub-lists:
-			$item = _DoLists(_Outdent($item));
-			chomp $item;
-			$item = _RunSpanGamut($item);
-		}
-
-		"
  • " . $item . "
  • \n"; - }egmx; - - $g_list_level--; - return $list_str; -} - - - -sub _DoCodeBlocks { -# -# Process Markdown `
    ` blocks.
    -#	
    -
    -	my $text = shift;
    -
    -	$text =~ s{
    -			(?:\n\n|\A)
    -			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    -			  (?:
    -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    -			    .*\n+
    -			  )+
    -			)
    -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    -		}{
    -			my $codeblock = $1;
    -			my $result; # return value
    -
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    -			$codeblock =~ s/\A\n+//; # trim leading newlines
    -			$codeblock =~ s/\n+\z//; # trim trailing newlines
    -
    -			$result = "\n\n
    " . $codeblock . "
    \n\n"; # CHANGED: Removed newline for MMD - - $result; - }egmx; - - return $text; -} - - -sub _DoCodeSpans { -# -# * Backtick quotes are used for spans. -# -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: -# -# Just type ``foo `bar` baz`` at the prompt. -# -# Will translate to: -# -#

    Just type foo `bar` baz at the prompt.

    -# -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. -# -# * You can use spaces to get literal backticks at the edges: -# -# ... type `` `bar` `` ... -# -# Turns to: -# -# ... type `bar` ... -# - - my $text = shift; - - $text =~ s@ - (?$c
    "; - @egsx; - - return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# - local $_ = shift; - - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&/g; - - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - - - # Do the angle bracket song and dance: - s! < !<!gx; - s! > !>!gx; - - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; - - return $_; -} - - -sub _DoItalicsAndBold { - my $text = shift; - - # Cave in - `*` and `_` behave differently... We'll see how it works out - - - # must go first: - $text =~ s{ (?$2}gsx; - - $text =~ s{ (?$2}gsx; - - # And now, a second pass to catch nested strong and emphasis special cases - $text =~ s{ (?$2}gsx; - - $text =~ s{ (?$2}gsx; - - # And now, allow `*` in the middle of words - - # must go first: - $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 } - {$2}gsx; - - $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 } - {$2}gsx; - - return $text; -} - - -sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with
     content, so we need to fix that:
    -			$bq =~ s{
    -					(\s*
    .+?
    ) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; +eval {require MultiMarkdown}; +if ($@) { + my $me = __FILE__; - "
    \n$bq\n
    \n\n"; - }egmx; + my $path = dirname(dirname($me)); + $path = File::Spec->join($path, 'lib'); - return $text; -} - - -sub _FormParagraphs { -# -# Params: -# $text - string to process with html

    tags -# - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap

    tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/

    /; - $_ .= "

    "; - } - } - - # - # Unhashify HTML blocks - # -# foreach my $graf (@grafs) { -# my $block = $g_html_blocks{$graf}; -# if (defined $block) { -# $graf = $block; -# } -# } - - foreach my $graf (@grafs) { - # Modify elements of @grafs in-place... - my $block = $g_html_blocks{$graf}; - if (defined $block) { - $graf = $block; - if ($block =~ m{ - \A - ( # $1 =
    tag -
    ]* - \b - markdown\s*=\s* (['"]) # $2 = attr quote char - 1 - \2 - [^>]* - > - ) - ( # $3 = contents - .* - ) - (
    ) # $4 = closing tag - \z - - }xms - ) { - my ($div_open, $div_content, $div_close) = ($1, $3, $4); - - # We can't call Markdown(), because that resets the hash; - # that initialization code should be pulled into its own sub, though. - $div_content = _HashHTMLBlocks($div_content); - $div_content = _StripLinkDefinitions($div_content); - $div_content = _RunBlockGamut($div_content); - $div_content = _UnescapeSpecialChars($div_content); - - $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; - - $graf = $div_open . "\n" . $div_content . "\n" . $div_close; - } - } - } - - - return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - - my $text = shift; - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{<}gi; - - return $text; -} - - -sub _EncodeBackslashEscapes { -# -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. -# - local $_ = shift; - - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. - s! \\` !$g_escape_table{'`'}!gx; - s! \\\* !$g_escape_table{'*'}!gx; - s! \\_ !$g_escape_table{'_'}!gx; - s! \\\{ !$g_escape_table{'{'}!gx; - s! \\\} !$g_escape_table{'}'}!gx; - s! \\\[ !$g_escape_table{'['}!gx; - s! \\\] !$g_escape_table{']'}!gx; - s! \\\( !$g_escape_table{'('}!gx; - s! \\\) !$g_escape_table{')'}!gx; - s! \\> !$g_escape_table{'>'}!gx; - s! \\\# !$g_escape_table{'#'}!gx; - s! \\\+ !$g_escape_table{'+'}!gx; - s! \\\- !$g_escape_table{'-'}!gx; - s! \\\. !$g_escape_table{'.'}!gx; - s{ \\! }{$g_escape_table{'!'}}gx; - - return $_; -} - - -sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{
    $1}gi; - - # Email addresses: - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; - - return $text; -} - - -sub _EncodeEmailAddress { -# -# Input: an email address, e.g. "foo@example.com" -# -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: -# -# foo -# @example.com -# -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: -# - - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; + unshift (@INC, $path); - $addr = qq{$addr}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - - return $addr; + require MultiMarkdown; } - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# - my $text = shift; - - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } - return $text; -} - - -sub _TokenizeHTML { -# -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as , or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. -# -# -# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# -# - - my $str = shift; - my $pos = 0; - my $len = length $str; - my @tokens; - - my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); - my $match = qr/(?s: ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags - - while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; - } - push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; - - return \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# - my $text = shift; - - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; -} - - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# -# - my $text = shift; - - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; -} - -# -# MultiMarkdown Routines -# - -sub _ParseMetaData { - my $text = shift; - my $clean_text = ""; - - my ($inMetaData, $currentKey) = (1,''); - - # If only metadata is "Format: complete" then skip - - if ($text =~ s/^(Format):\s*complete\n(.*?)\n/$2\n/is) { - # If "Format: complete" was added automatically, don't force first - # line of text to be metadata - $g_metadata{$1}= "complete"; - $g_document_format = "complete"; - } - - foreach my $line ( split /\n/, $text ) { - $line =~ /^$/ and $inMetaData = 0; - if ($inMetaData) { - if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) { - $currentKey = $1; - my $meta = $2; - $currentKey =~ s/\s+/ /g; - $currentKey =~ s/\s$//; - $g_metadata{$currentKey} = $meta; - if (lc($currentKey) eq "format") { - $g_document_format = lc($g_metadata{$currentKey}); - } - if (lc($currentKey) eq "base url") { - $g_base_url = $g_metadata{$currentKey}; - } - if (lc($currentKey) eq "bibliography title") { - $g_bibliography_title = $g_metadata{$currentKey}; - $g_bibliography_title =~ s/\s*$//; - } - if (lc($currentKey) eq "base header level") { - $g_base_header_level = $g_metadata{$currentKey}; - } - if (!$g_metadata_newline{$currentKey}) { - $g_metadata_newline{$currentKey} = $g_metadata_newline{default}; - } - } else { - if ($currentKey eq "") { - # No metadata present - $clean_text .= "$line\n"; - $inMetaData = 0; - next; - } - if ($line =~ /^\s*(.+)$/ ) { - $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1"; - } - } - } else { - $clean_text .= "$line\n"; - } - } - - return $clean_text; -} - -sub _StripFootnoteDefinitions { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - while ($text =~ s{ - \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 - \n? - (.*?)\n{1,2} # end at new paragraph - ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - } - {\n}sx) - { - my $id = $1; - my $footnote = "$2\n"; - $footnote =~ s/^[ ]{0,$g_tab_width}//gm; - - $g_footnotes{id2footnote($id)} = $footnote; - } - - return $text; -} - -sub _DoFootnotes { - my $text = shift; - - # First, run routines that get skipped in footnotes - foreach my $label (sort keys %g_footnotes) { - my $footnote = _RunBlockGamut($g_footnotes{$label}); - - $footnote = _DoMarkdownCitations($footnote); - $g_footnotes{$label} = $footnote; - } - - $text =~ s{ - \[\^(.+?)\] # id = $1 - }{ - my $result = ""; - my $id = id2footnote($1); - if (defined $g_footnotes{$id} ) { - $g_footnote_counter++; - if ($g_footnotes{$id} =~ /^(

    )?glossary:/i) { - $result = "$g_footnote_counter"; - } else { - $result = "$g_footnote_counter"; - } - push (@g_used_footnotes,$id); - } - $result; - }xsge; - - return $text; -} - -sub _FixFootnoteParagraphs { - my $text = shift; - - $text =~ s/^\\<\/footnote\>/<\/footnote>/gm; - - return $text; -} - -sub _PrintFootnotes{ - my $footnote_counter = 0; - my $result = ""; - - foreach my $id (@g_used_footnotes) { - $footnote_counter++; - my $footnote = $g_footnotes{$id}; - my $footnote_closing_tag = ""; - - $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; - $footnote_closing_tag = $1; - - if ($footnote =~ s/^(

    )?glossary:\s*//i) { - # Add some formatting for glossary entries - - $footnote =~ s{ - ^(.*?) # $1 = term - \s* - (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key - \n - }{ - my $glossary = "$1"; - - if ($2) { - $glossary.="$2"; - }; - - $glossary . ":

    "; - }egsx; - - $result.="

  • $footnote ↩$footnote_closing_tag
  • \n\n"; - } else { - $result.="
  • $footnote ↩$footnote_closing_tag
  • \n\n"; - } - } - $result .= "\n
    "; - - if ($footnote_counter > 0) { - $result = "\n\n
    \n\n\n".$result; - } else { - $result = ""; - } - - $result= _UnescapeSpecialChars($result); - return $result; -} - -sub Header2Label { - my $header = shift; - my $label = lc $header; - $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters - while ($label =~ s/^[^A-Za-z]//g) - {}; # Strip illegal leading characters - return $label; -} - -sub id2footnote { - # Since we prepend "fn:", we can allow leading digits in footnotes - my $id = shift; - my $footnote = lc $id; - $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters - return $footnote; -} - - -sub xhtmlMetaData { - my $result = qq{\n}; - - # This screws up xsltproc - make sure to use `-nonet -novalid` if you - # have difficulty - if ($g_allow_mathml) { - $result .= qq{ -\n}; - - $result.= qq{\n\t\n}; - } else { - $result .= qq{\n}; - - $result.= qq!\n\t\n!; - } - - $result.= "\t\t\n"; - - foreach my $key (sort keys %g_metadata ) { - # Strip trailing spaces - $g_metadata{$key} =~ s/(\s)*$//s; - - # Strip spaces from key - my $export_key = $key; - $export_key =~ s/\s//g; - - if (lc($key) eq "title") { - $result.= "\t\t" . _EncodeAmpsAndAngles($g_metadata{$key}) . "\n"; - } elsif (lc($key) eq "css") { - $result.= "\t\t\n/g; - - if ($result ne "") { - $result.= "\n"; - } - - return $result; -} - -sub _ConvertCopyright{ - my $text = shift; - # Convert to an XML compatible form of copyright symbol - - $text =~ s/©/©/gi; - - return $text; -} - - -sub _DoTables { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Algorithm inspired by PHP Markdown Extra's table support - # - - # Reusable regexp's to match table - - my $line_start = qr{ - [ ]{0,$less_than_tab} - }mx; - - my $table_row = qr{ - [^\n]*?\|[^\n]*?\n - }mx; - - my $first_row = qr{ - $line_start - \S+.*?\|.*?\n - }mx; - - my $table_rows = qr{ - (\n?$table_row) - }mx; - - my $table_caption = qr{ - $line_start - \[.*?\][ \t]*\n - }mx; - - my $table_divider = qr{ - $line_start - [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* - }mx; - - my $whole_table = qr{ - ($table_caption)? # Optional caption - ($first_row # First line must start at beginning - ($table_row)*?)? # Header Rows - $table_divider # Divider/Alignment definitions - $table_rows+ # Body Rows - ($table_caption)? # Optional caption - }mx; - - - # Find whole tables, then break them up and process them - - $text =~ s{ - ^($whole_table) # Whole table in $1 - (\n|\Z) # End of file or 2 blank lines - }{ - my $table = $1; - - # Clean extra spaces at end of lines - - # they cause the processing to choke - $table =~ s/[\t ]*\n/\n/gs; - - my $result = "\n"; - my @alignments; - my $use_row_header = 1; - - # Add Caption, if present - - if ($table =~ s/^$line_start(?:\[\s*(.*)\s*\])?(?:\[\s*(.*?)\s*\])[ \t]*$//m) { - my $table_id = ""; - my $table_caption = ""; - - $table_id = Header2Label($2); - - if (defined $1) { - $table_caption = $1; - } else { - $table_caption = $2; - } - $result .= "\n"; - - $g_crossrefs{$table_id} = "#$table_id"; - $g_titles{$table_id} = "see table"; # captions with "stuff" in them break links - } - - # If a second "caption" is present, treat it as a summary - # However, this is not valid in XHTML 1.0 Strict - # But maybe in future - - # A summary might be longer than one line - if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { - # $result .= "" . _RunSpanGamut($1) . "\n"; - } - - # Now, divide table into header, alignment, and body - - # First, add leading \n in case there is no header - - $table = "\n" . $table; - - # Need to be greedy - - $table =~ s/\n($table_divider)\n(($table_rows)+)//s; - - my $body = ""; - my $alignment_string = ""; - if (defined $1){ - $alignment_string = $1; - } - if (defined $2){ - $body = $2; - } - - # Process column alignment - while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) { - my $cell = _RunSpanGamut($1); - if ($cell =~ /\+/){ - $result .= "$cell\n"; - } else { - $result .= "\t<$cell_type$colspan>$cell\n"; - } - $count++; - } - $result .= "\n"; - } - - # Strip out empty sections - $result =~ s/\s*<\/thead>\s*//s; - - # Handle pull-quotes - - # This might be too specific for my needs. If others want it - # removed, I am open to discussion. - - $result =~ s/
    " . _RunSpanGamut($table_caption). "
    \s*\s*/
    \n\n/s; - - $result .= "\n
    \n"; - $result - }egmx; - - my $table_body = qr{ - ( # wrap whole match in $2 - - (.*?\|.*?)\n # wrap headers in $3 - - [ ]{0,$less_than_tab} - ($table_divider) # alignment in $4 - - ( # wrap cells in $5 - $table_rows - ) - ) - }mx; - - return $text; -} - - -sub _DoAttributes{ - my $id = shift; - my $result = ""; - - if (defined $g_attributes{$id}) { - my $attributes = $g_attributes{$id}; - while ($attributes =~ s/(\S+)="(.*?)"//) { - $result .= " $1=\"$2\""; - } - while ($attributes =~ /(\S+)=(\S+)/g) { - $result .= " $1=\"$2\""; - } - } - - return $result; -} - - -sub _StripMarkdownReferences { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - while ($text =~ s{ - \n\[\#(.+?)\]:[ \t]* # id = $1 - \n? - (.*?)\n{1,2} # end at new paragraph - ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - } - {\n}sx) - { - my $id = $1; - my $reference = "$2\n"; - - $reference =~ s/^[ ]{0,$g_tab_width}//gm; - - $reference = _RunBlockGamut($reference); - - # strip leading and trailing

    tags (they will be added later) - $reference =~ s/^\//s; - $reference =~ s/\<\/p\>\s*$//s; - - $g_references{$id} = $reference; - } - - return $text; -} - -sub _DoMarkdownCitations { - my $text = shift; - - $text =~ s{ # Allow for citations without locator to be written - \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than - [ ]? # [][#author] - (?:\n[ ]*)? - \[\s*\] - }{ - "[][#$1]"; - }xsge; - - $text =~ s{ - \[([^\[]*?)\] # citation text = $1 - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - \[\#(.*?)\] # id = $2 - }{ - my $result; - my $anchor_text = $1; - my $id = $2; - my $count; - - # implement equivalent to \citet - my $textual_string = ""; - if ($anchor_text =~ s/^(.*?);\s*//) { - $textual_string = "$1"; - } - - if (defined $g_references{$id} ) { - my $citation_counter=0; - - # See if citation has been used before - foreach my $old_id (@g_used_references) { - $citation_counter++; - $count = $citation_counter if ($old_id eq $id); - } - - if (! defined $count) { - $g_citation_counter++; - $count = $g_citation_counter; - push (@g_used_references,$id); - } - - $result = "$textual_string ($count"; - - if ($anchor_text ne "") { - $result .=", $anchor_text"; - } - - $result .= ")"; - } else { - # No reference exists - $result = "$textual_string ($id"; - - if ($anchor_text ne "") { - $result .=", $anchor_text"; - } - - $result .= ")"; - } - - if (Header2Label($anchor_text) eq "notcited"){ - $result = ""; - } - $result; - }xsge; - - return $text; - -} - -sub _PrintMarkdownBibliography{ - my $citation_counter = 0; - my $result; - - foreach my $id (@g_used_references) { - $citation_counter++; - $result.="

    [$citation_counter] $g_references{$id}

    \n\n"; - } - $result .= "
    "; - - if ($citation_counter > 0) { - $result = "\n\n
    \n$g_bibliography_title

    \n\n".$result; - } else { - $result = ""; - } - - return $result; -} - -sub _GenerateImageCrossRefs { - my $text = shift; - - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $alt_text; # for shortcut links like ![this][]. - } - - $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { - my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; - } - else { - # If there's no such link ID, leave intact: - $result = $whole_match; - } - - $whole_match; - }xsge; - - # - # Next, handle inline images: ![alt text](url "optional title") - # Don't forget: encode * and _ - - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - \( # literal paren - [ \t]* - ? # src url = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 ' - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - - $alt_text =~ s/"/"/g; - my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; - $whole_match; - }xsge; - - return $text; -} - -sub _FindMathEquations{ - my $text = shift; - - $text =~ s{ - (\]*)id=\"(.*?)\"> # " - }{ - my $label = Header2Label($2); - my $header = _RunSpanGamut($2); - - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; - - $1 . "id=\"$label\">"; - }xsge; - - return $text; -} - -sub _DoMathSpans { - # Based on Gruber's _DoCodeSpans - - my $text = shift; - my $display_as_block = 0; - $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); - - $text =~ s{ - (?>) - }{ - my $m = "$2"; - my $label = ""; - my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); - - if (defined $3) { - $label = Header2Label($3); - my $header = _RunSpanGamut($3); - - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; - } - $m =~ s/^[ \t]*//g; # leading whitespace - $m =~ s/[ \t]*$//g; # trailing whitespace - push(@attr,(id=>"$label")) if ($label ne ""); - push(@attr,(display=>"block")) if ($display_as_block == 1); - - $m = $mathParser->TextToMathML($m,\@attr); - "$m"; - }egsx; - - return $text; -} - -sub _DoDefinitionLists { - # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra - - my $text = shift; - my $less_than_tab = $g_tab_width -1; - - my $line_start = qr{ - [ ]{0,$less_than_tab} - }mx; - - my $term = qr{ - $line_start - [^:\s][^\n]*\n - }sx; - - my $definition = qr{ - \n?[ ]{0,$less_than_tab} - \:[ \t]+(.*?)\n - ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns, - # or the end of the document - }sx; - - my $definition_block = qr{ - ((?:$term)+) # $1 = one or more terms - ((?:$definition)+) # $2 = by one or more definitions - }sx; - - my $definition_list = qr{ - (?:$definition_block\n*)+ # One ore more definition blocks - }sx; - - $text =~ s{ - ($definition_list) # $1 = the whole list - }{ - my $list = $1; - my $result = $1; - - $list =~ s{ - (?:$definition_block)\n* - }{ - my $terms = $1; - my $defs = $2; - - $terms =~ s{ - [ ]{0,$less_than_tab} - (.*) - \s* - }{ - my $term = $1; - my $result = ""; - $term =~ s/^\s*(.*?)\s*$/$1/; - if ($term !~ /^\s*$/){ - $result = "
    " . _RunSpanGamut($1) . "
    \n"; - } - $result; - }xmge; - - $defs =~ s{ - $definition - }{ - my $def = $1 . "\n"; - $def =~ s/^[ ]{0,$g_tab_width}//gm; - "
    \n" . _RunBlockGamut($def) . "\n
    \n"; - }xsge; - - $terms . $defs . "\n"; - }xsge; - - "
    \n" . $list . "
    \n\n"; - }xsge; - - return $text -} - -sub _UnescapeComments{ - # Remove encoding inside comments - # Based on proposal by Toras Doran (author of Text::MultiMarkdown) - - my $text = shift; - $text =~ s{ - (?<=) # End comments - }{ - my $t = $1; - $t =~ s/&/&/g; - $t =~ s/</ - - -=head1 SYNOPSIS - -B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] - [ I ... ] - - -=head1 DESCRIPTION - -MultiMarkdown is an extended version of Markdown. See the website for more -information. - - http://fletcherpenney.net/multimarkdown/ - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like
    and as well). - -For more information about Markdown's syntax, see: - - http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - -
    - -instead of Markdown's default XHTML style tags, e.g.: - -
    - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - - support@daringfireball.net (for Markdown issues) - - owner@fletcherpenney.net (for MultiMarkdown issues) - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output (Multi)Markdown actually produced. - - -=head1 AUTHOR - - John Gruber - http://daringfireball.net/ - - PHP port and other contributions by Michel Fortin - http://michelf.com/ - - MultiMarkdown changes by Fletcher Penney - http://fletcherpenney.net/ - -=head1 COPYRIGHT AND LICENSE - -Original Markdown Code Copyright (c) 2003-2007 John Gruber - -All rights reserved. - -MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut - diff --git a/bin/ASCIIMathML.pm b/lib/ASCIIMathML.pm similarity index 100% rename from bin/ASCIIMathML.pm rename to lib/ASCIIMathML.pm diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm new file mode 100755 index 0000000..9120023 --- /dev/null +++ b/lib/MultiMarkdown.pm @@ -0,0 +1,2699 @@ +#!/usr/bin/env perl + +# MultiMarkdown -- A modification of John Gruber's original Markdown +# that adds new features and an output format that can more readily +# be converted into other document formats +# +# $Id: MultiMarkdown.pl 525 2009-06-15 18:45:44Z fletcher $ +# +# Original Code Copyright (c) 2004-2007 John Gruber +# +# +# MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney +# +# +# MultiMarkdown Version 2.0.b6 +# +# Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007 +# +# +# TODO: Change math mode delimiter? +# TODO: Still need to get the glossary working in non-memoir documents +# TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly" +# TODO: Look into discussion re: assigning classes to div's/span's on Markdown list. +# TODO: Improve support for tables with long items and overall width in LaTeX +# TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet +# TODO: Create utilities to convert MMD tables to/from tab-delimited + + +package MultiMarkdown; +require 5.006_000; +use strict; +use warnings; + +use File::Basename; + +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require Text::ASCIIMathML; +} else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl + my $me = __FILE__; # Where am I? + + # Am I running in Windoze? + my $os = $^O; + + # Get just the directory portion + if ($os =~ /MSWin/) { + $me = dirname($me) . "\\"; + } else { + $me = dirname($me) . "/"; + } + require $me ."ASCIIMathML.pm"; +} + + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION $g_use_metadata $g_base_url + $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser); +$VERSION = '2.0.b6'; + +$mathParser = new Text::ASCIIMathML(); + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; +my $g_allow_mathml = 1; +my $g_base_header_level = 1; + +# +# Globals: +# + +# Reusable patterns to match balanced [brackets] and (parens). See +# Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my ($g_nested_brackets, $g_nested_parens); +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + +# Doesn't allow for whitespace, because we're using it to match URLs: +$g_nested_parens = qr{ + (?> # Atomic matching + [^()\s]+ # Anything other than parens or whitespace + | + \( + (??{ $g_nested_parens }) # Recursive set of nested brackets + \) + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls = (); +my %g_titles= (); +my %g_html_blocks = (); +my %g_metadata = (); +my %g_metadata_newline = (); +my %g_crossrefs = (); +my %g_footnotes = (); +my %g_attributes = (); +my @g_used_footnotes = (); +my $g_footnote_counter = 0; + +my $g_citation_counter = 0; +my @g_used_references = (); +my %g_references = (); +$g_bibliography_title = "Bibliography"; + +$g_use_metadata = 1; +$g_metadata_newline{default} = "\n"; +$g_metadata_newline{keywords} = ", "; +my $g_document_format = ""; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "MultiMarkdown", + description => "Based on the original Markdown", + doc_link => 'http://fletcherpenney.net/multimarkdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MultiMarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('multimarkdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('multimarkdown' => { + label => 'MultiMarkdown', + docs => 'http://fletcherpenney.net/MultiMarkdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('multimarkdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('multimarkdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('multimarkdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('multimarkdown_with_smartypants' => { + label => 'MultiMarkdown With SmartyPants', + docs => 'http://fletcherpenney.net/MultiMarkdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('multimarkdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is MultiMarkdown, version $VERSION.\n"; + print "Original code Copyright 2004 John Gruber\n"; + print "MultiMarkdown changes Copyright 2005-2009 Fletcher Penney\n"; + print "http://fletcherpenney.net/multimarkdown/\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text); + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +# and tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + %g_metadata = (); + %g_crossrefs = (); + %g_footnotes = (); + @g_used_footnotes = (); + $g_footnote_counter = 0; + @g_used_references = (); + %g_references = (); + $g_citation_counter = 0; + %g_attributes = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Strip out MetaData + $text = _ParseMetaData($text) if $g_use_metadata; + + # And recheck for leading blank lines + $text =~ s/^\n+//s; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip footnote and link definitions, store in hashes. + $text = _StripFootnoteDefinitions($text); + + $text = _StripLinkDefinitions($text); + + _GenerateImageCrossRefs($text); + + $text = _StripMarkdownReferences($text); + + $text = _RunBlockGamut($text); + + $text = _DoMarkdownCitations($text); + + $text = _DoFootnotes($text); + + $text = _UnescapeSpecialChars($text); + + # Clean encoding within HTML comments + $text = _UnescapeComments($text); + + $text = _FixFootnoteParagraphs($text); + $text .= _PrintFootnotes(); + + $text .= _PrintMarkdownBibliography(); + + $text = _ConvertCopyright($text); + + if (lc($g_document_format) =~ /^complete\s*$/i) { + return xhtmlMetaData() . "\n\n" . $text . "\n\n"; + } elsif (lc($g_document_format) =~ /^snippet\s*$/i) { + return $text . "\n"; + } else { + return $g_document_format . textMetaData() . $text . "\n"; + } + +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + # Pattern altered for MultiMarkdown + # in order to not match citations or footnotes + ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + + # MultiMarkdown addition for attribute support + \n? + ( # Attributes = $4 + (?<=\s) # lookbehind for whitespace + (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))* + )? + [ \t]* + # /addition + (?:\n+|\Z) + } + {}mx) { +# $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + $g_urls{lc $1} = $2; # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + + # MultiMarkdown addition " + if ($4) { + $g_attributes{lc $1} = $4; + } + # /addition + } + + return $text; +} + +sub _StripHTML { + # Strip (X)HTML code from string + my $text = shift; + + $text =~ s/<.*?>//g; + + return $text; +} + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

    s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags = qr{ + (?: + p | div | h[1-6] | blockquote | pre | table | + dl | ol | ul | script | noscript | form | + fieldset | iframe | ins | del + ) + }x; # MultiMarkdown does not include `math` in the above list so that + # Equations can optionally be included in separate paragraphs + + my $tag_attrs = qr{ + (?: # Match one attr name/value pair + \s+ # There needs to be at least some whitespace + # before each attribute name. + [\w.:_-]+ # Attribute name + \s*=\s* + (?: + ".+?" # "Attribute value" + | + '.+?' # 'Attribute value' + ) + )* # Zero or more + }x; + + my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; + my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; + my $close_tag = undef; # let Text::Balanced handle this + + use Text::Balanced qw(gen_extract_tagged); + my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); + + my @chunks; + ## TO-DO: the 0,3 on the next line ought to respect the + ## tabwidth, or else, we should mandate 4-space tabwidth and + ## be done with it: + while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { + my $cur_line = $1; + if (defined $2) { + # current line could be start of code block + + my ($tag, $remainder) = $extract_block->($cur_line . $text); + if ($tag) { + my $key = md5_hex($tag); + $g_html_blocks{$key} = $tag; + push @chunks, "\n\n" . $key . "\n\n"; + $text = $remainder; + } + else { + # No tag match, so toss $cur_line into @chunks + push @chunks, $cur_line; + } + } + else { + # current line could NOT be start of code block + push @chunks, $cur_line; + } + + } + push @chunks, $text; # Whatever is left. + + $text = join '', @chunks; + + + + # Special case just for


    . It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # PHP and ASP-style processor instructions ( and <%…%>) + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + <([?%]) # $2 + .*? + \2> + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do tables first to populate the table id's for cross-refs + # Escape
     so we don't get greedy with tables
    +	$text = _DoTables($text);
    +	
    +	# And now, protect our tables
    +	$text = _HashHTMLBlocks($text);
    +
    +	# Do Horizontal Rules:
    +	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags.
    +	$text = _HashHTMLBlocks($text);
    +	$text = _FormParagraphs($text);
    +
    +	return $text;
    +}
    +
    +
    +sub _RunSpanGamut {
    +#
    +# These are all the transformations that occur *within* block-level
    +# tags like paragraphs, headers, and list items.
    +#
    +	my $text = shift;
    +
    +	$text = _DoCodeSpans($text);
    +	$text = _DoMathSpans($text);
    +	$text = _EscapeSpecialCharsWithinTagAttributes($text);
    +	$text = _EncodeBackslashEscapes($text);
    +
    +	# Process anchor and image tags. Images must come first,
    +	# because ![foo][f] looks like an anchor.
    +	$text = _DoImages($text);
    +	$text = _DoAnchors($text);	
    +
    +	# Make links out of things like ``
    +	# Must come after _DoAnchors(), because you can use < and >
    +	# delimiters in inline links like [this]().
    +	$text = _DoAutoLinks($text);
    +	$text = _EncodeAmpsAndAngles($text);
    +	$text = _DoItalicsAndBold($text);
    +
    +	# Do hard breaks:
    +	$text =~ s/ {2,}\n/  -- encode [\ ` * _] so they
    +# don't conflict with their use in Markdown for code, italics and strong.
    +# We're replacing each such character with its corresponding MD5 checksum
    +# value; this is likely overkill, but it should prevent us from colliding
    +# with the escape values by accident.
    +#
    +	my $text = shift;
    +	my $tokens ||= _TokenizeHTML($text);
    +	$text = '';   # rebuild $text from the tokens
    +
    +	foreach my $cur_token (@$tokens) {
    +		if ($cur_token->[0] eq "tag") {
    +			$cur_token->[1] =~  s! \\ !$g_escape_table{'\\'}!gx;
    +			$cur_token->[1] =~  s{ (?<=.)(?=.)  }{$g_escape_table{'`'}}gx;
    +			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
    +			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
    +		}
    +		$text .= $cur_token->[1];
    +	}
    +	return $text;
    +}
    +
    +
    +sub _DoAnchors {
    +#
    +# Turn Markdown link shortcuts into XHTML  tags.
    +#
    +	my $text = shift;
    +
    +	#
    +	# First, handle reference-style links: [link text] [id]
    +	#
    +	$text =~ s{
    +		(					# wrap whole match in $1
    +		  \[
    +		    ($g_nested_brackets)	# link text = $2
    +		  \]
    +
    +		  [ ]?				# one optional space
    +		  (?:\n[ ]*)?		# one optional newline followed by spaces
    +
    +		  \[
    +		    (.*?)		# id = $3
    +		  \]
    +		)
    +	}{
    +		my $result;
    +		my $whole_match = $1;
    +		my $link_text   = $2;
    +		my $link_id     = lc $3;
    +
    +		if ($link_id eq "") {
    +			$link_id = lc $link_text;     # for shortcut links like [this][].
    +		}
    +
    +		# Allow automatic cross-references to headers
    +		my $label = Header2Label($link_id);
    +		if (defined $g_urls{$link_id}) {
    +			my $url = $g_urls{$link_id};
    +			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    +			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    +			$result = "$}{$1};					# Remove <>'s surrounding URL, if present
    +		$result = " tags.
    +#
    +	my $text = shift;
    +
    +	#
    +	# First, handle reference-style labeled images: ![alt text][id]
    +	#
    +	$text =~ s{
    +		(				# wrap whole match in $1
    +		  !\[
    +		    (.*?)		# alt text = $2
    +		  \]
    +
    +		  [ ]?				# one optional space
    +		  (?:\n[ ]*)?		# one optional newline followed by spaces
    +
    +		  \[
    +		    (.*?)		# id = $3
    +		  \]
    +
    +		)
    +	}{
    +		my $result;
    +		my $whole_match = $1;
    +		my $alt_text    = $2;
    +		my $link_id     = lc $3;
    +
    +		if ($link_id eq "") {
    +			$link_id = lc $alt_text;     # for shortcut links like ![this][].
    +		}
    +
    +		$alt_text =~ s/"/"/g;
    +		if (defined $g_urls{$link_id}) {
    +			my $url = $g_urls{$link_id};
    +			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    +			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    +			
    +			my $label = Header2Label($alt_text);
    +			$g_crossrefs{$label} = "#$label";
    +			if (! defined $g_titles{$link_id}) {
    +				$g_titles{$link_id} = $alt_text;
    +			}
    +			
    +			$result = "\"$alt_text\"";$}{$1};					# Remove <>'s surrounding URL, if present
    +
    +		my $label = Header2Label($alt_text);
    +		$g_crossrefs{$label} = "#$label";
    +#		$g_titles{$label} = $alt_text;			# I think this line should not be here
    +			
    +		$result = "\"$alt_text\"";"  .  $header  .  "\n\n";
    +	}egmx;
    +
    +	$text =~ s{ ^(.+?)(?:\s*(?"  .  $header  .  "\n\n";
    +	}egmx;
    +
    +
    +	# atx-style headers:
    +	#	# Header 1
    +	#	## Header 2
    +	#	## Header 2 with closing hashes ##
    +	#	...
    +	#	###### Header 6
    +	#
    +	$text =~ s{
    +			^(\#{1,6})	# $1 = string of #'s
    +			[ \t]*
    +			(.+?)		# $2 = Header text
    +			[ \t]*
    +			(?:(?"  .  $header  .  "\n\n";
    +		}egmx;
    +
    +	return $text;
    +}
    +
    +
    +sub _DoLists {
    +#
    +# Form HTML ordered (numbered) and unordered (bulleted) lists.
    +#
    +	my $text = shift;
    +	my $less_than_tab = $g_tab_width - 1;
    +
    +	# Re-usable patterns to match list item bullets and number markers:
    +	my $marker_ul  = qr/[*+-]/;
    +	my $marker_ol  = qr/\d+[.]/;
    +	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
    +
    +	# Re-usable pattern to match any entirel ul or ol list:
    +	my $whole_list = qr{
    +		(								# $1 = whole list
    +		  (								# $2
    +			[ ]{0,$less_than_tab}
    +			(${marker_any})				# $3 = first list item marker
    +			[ \t]+
    +		  )
    +		  (?s:.+?)
    +		  (								# $4
    +			  \z
    +			|
    +			  \n{2,}
    +			  (?=\S)
    +			  (?!						# Negative lookahead for another list item marker
    +				[ \t]*
    +				${marker_any}[ \t]+
    +			  )
    +		  )
    +		)
    +	}mx;
    +
    +	# We use a different prefix before nested lists than top-level lists.
    +	# See extended comment in _ProcessListItems().
    +	#
    +	# Note: There's a bit of duplication here. My original implementation
    +	# created a scalar regex pattern as the conditional result of the test on
    +	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
    +	# substitution once, using the scalar as the pattern. This worked,
    +	# everywhere except when running under MT on my hosting account at Pair
    +	# Networks. There, this caused all rebuilds to be killed by the reaper (or
    +	# perhaps they crashed, but that seems incredibly unlikely given that the
    +	# same script on the same server ran fine *except* under MT. I've spent
    +	# more time trying to figure out why this is happening than I'd like to
    +	# admit. My only guess, backed up by the fact that this workaround works,
    +	# is that Perl optimizes the substition when it can figure out that the
    +	# pattern will never change, and when this optimization isn't on, we run
    +	# afoul of the reaper. Thus, the slightly redundant code that uses two
    +	# static s/// patterns rather than one conditional pattern.
    +
    +	if ($g_list_level) {
    +		$text =~ s{
    +				^
    +				$whole_list
    +			}{
    +				my $list = $1;
    +				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
    +
    +				# Turn double returns into triple returns, so that we can make a
    +				# paragraph for the last item in a list, if necessary:
    +				$list =~ s/\n{2,}/\n\n\n/g;
    +				my $result = _ProcessListItems($list, $marker_any);
    +
    +				# Trim any trailing whitespace, to put the closing ``
    +				# up on the preceding line, to get it past the current stupid
    +				# HTML block parser. This is a hack to work around the terrible
    +				# hack that is the HTML block parser.
    +				$result =~ s{\s+$}{};
    +				$result = "<$list_type>" . $result . "\n";
    +				$result;
    +			}egmx;
    +	}
    +	else {
    +		$text =~ s{
    +				(?:(?<=\n\n)|\A\n?)
    +				$whole_list
    +			}{
    +				my $list = $1;
    +				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
    +				# Turn double returns into triple returns, so that we can make a
    +				# paragraph for the last item in a list, if necessary:
    +				$list =~ s/\n{2,}/\n\n\n/g;
    +				my $result = _ProcessListItems($list, $marker_any);
    +				$result = "<$list_type>\n" . $result . "\n";
    +				$result;
    +			}egmx;
    +	}
    +
    +
    +	return $text;
    +}
    +
    +
    +sub _ProcessListItems {
    +#
    +#	Process the contents of a single ordered or unordered list, splitting it
    +#	into individual list items.
    +#
    +
    +	my $list_str = shift;
    +	my $marker_any = shift;
    +
    +
    +	# The $g_list_level global keeps track of when we're inside a list.
    +	# Each time we enter a list, we increment it; when we leave a list,
    +	# we decrement. If it's zero, we're not in a list anymore.
    +	#
    +	# We do this because when we're not inside a list, we want to treat
    +	# something like this:
    +	#
    +	#		I recommend upgrading to version
    +	#		8. Oops, now this line is treated
    +	#		as a sub-list.
    +	#
    +	# As a single paragraph, despite the fact that the second line starts
    +	# with a digit-period-space sequence.
    +	#
    +	# Whereas when we're inside a list (or sub-list), that line will be
    +	# treated as the start of a sub-list. What a kludge, huh? This is
    +	# an aspect of Markdown's syntax that's hard to parse perfectly
    +	# without resorting to mind-reading. Perhaps the solution is to
    +	# change the syntax rules such that sub-lists must start with a
    +	# starting cardinal number; e.g. "1." or "a.".
    +
    +	$g_list_level++;
    +
    +	# trim trailing blank lines:
    +	$list_str =~ s/\n{2,}\z/\n/;
    +
    +
    +	$list_str =~ s{
    +		(\n)?							# leading line = $1
    +		(^[ \t]*)						# leading whitespace = $2
    +		($marker_any) [ \t]+			# list marker = $3
    +		((?s:.+?)						# list item text   = $4
    +		(\n{1,2}))
    +		(?= \n* (\z | \2 ($marker_any) [ \t]+))
    +	}{
    +		my $item = $4;
    +		my $leading_line = $1;
    +		my $leading_space = $2;
    +
    +		if ($leading_line or ($item =~ m/\n{2,}/)) {
    +			$item = _RunBlockGamut(_Outdent($item));
    +		}
    +		else {
    +			# Recursion for sub-lists:
    +			$item = _DoLists(_Outdent($item));
    +			chomp $item;
    +			$item = _RunSpanGamut($item);
    +		}
    +
    +		"
  • " . $item . "
  • \n"; + }egmx; + + $g_list_level--; + return $list_str; +} + + + +sub _DoCodeBlocks { +# +# Process Markdown `
    ` blocks.
    +#	
    +
    +	my $text = shift;
    +
    +	$text =~ s{
    +			(?:\n\n|\A)
    +			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    +			  (?:
    +			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    +			    .*\n+
    +			  )+
    +			)
    +			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +		}{
    +			my $codeblock = $1;
    +			my $result; # return value
    +
    +			$codeblock = _EncodeCode(_Outdent($codeblock));
    +			$codeblock = _Detab($codeblock);
    +			$codeblock =~ s/\A\n+//; # trim leading newlines
    +			$codeblock =~ s/\n+\z//; # trim trailing newlines
    +
    +			$result = "\n\n
    " . $codeblock . "
    \n\n"; # CHANGED: Removed newline for MMD + + $result; + }egmx; + + return $text; +} + + +sub _DoCodeSpans { +# +# * Backtick quotes are used for spans. +# +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: +# +# Just type ``foo `bar` baz`` at the prompt. +# +# Will translate to: +# +#

    Just type foo `bar` baz at the prompt.

    +# +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. +# +# * You can use spaces to get literal backticks at the edges: +# +# ... type `` `bar` `` ... +# +# Turns to: +# +# ... type `bar` ... +# + + my $text = shift; + + $text =~ s@ + (?$c
    "; + @egsx; + + return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# + local $_ = shift; + + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; + + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } + } + + + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; + + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; + + return $_; +} + + +sub _DoItalicsAndBold { + my $text = shift; + + # Cave in - `*` and `_` behave differently... We'll see how it works out + + + # must go first: + $text =~ s{ (?$2}gsx; + + $text =~ s{ (?$2}gsx; + + # And now, a second pass to catch nested strong and emphasis special cases + $text =~ s{ (?$2}gsx; + + $text =~ s{ (?$2}gsx; + + # And now, allow `*` in the middle of words + + # must go first: + $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 } + {$2}gsx; + + $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 } + {$2}gsx; + + return $text; +} + + +sub _DoBlockQuotes { + my $text = shift; + + $text =~ s{ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = _RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /g; + # These leading spaces screw with
     content, so we need to fix that:
    +			$bq =~ s{
    +					(\s*
    .+?
    ) + }{ + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "
    \n$bq\n
    \n\n"; + }egmx; + + + return $text; +} + + +sub _FormParagraphs { +# +# Params: +# $text - string to process with html

    tags +# + my $text = shift; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap

    tags. + # + foreach (@grafs) { + unless (defined( $g_html_blocks{$_} )) { + $_ = _RunSpanGamut($_); + s/^([ \t]*)/

    /; + $_ .= "

    "; + } + } + + # + # Unhashify HTML blocks + # +# foreach my $graf (@grafs) { +# my $block = $g_html_blocks{$graf}; +# if (defined $block) { +# $graf = $block; +# } +# } + + foreach my $graf (@grafs) { + # Modify elements of @grafs in-place... + my $block = $g_html_blocks{$graf}; + if (defined $block) { + $graf = $block; + if ($block =~ m{ + \A + ( # $1 =
    tag +
    ]* + \b + markdown\s*=\s* (['"]) # $2 = attr quote char + 1 + \2 + [^>]* + > + ) + ( # $3 = contents + .* + ) + (
    ) # $4 = closing tag + \z + + }xms + ) { + my ($div_open, $div_content, $div_close) = ($1, $3, $4); + + # We can't call Markdown(), because that resets the hash; + # that initialization code should be pulled into its own sub, though. + $div_content = _HashHTMLBlocks($div_content); + $div_content = _StripLinkDefinitions($div_content); + $div_content = _RunBlockGamut($div_content); + $div_content = _UnescapeSpecialChars($div_content); + + $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; + + $graf = $div_open . "\n" . $div_content . "\n" . $div_close; + } + } + } + + + return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + + my $text = shift; + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; + + return $text; +} + + +sub _EncodeBackslashEscapes { +# +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. +# + local $_ = shift; + + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\` !$g_escape_table{'`'}!gx; + s! \\\* !$g_escape_table{'*'}!gx; + s! \\_ !$g_escape_table{'_'}!gx; + s! \\\{ !$g_escape_table{'{'}!gx; + s! \\\} !$g_escape_table{'}'}!gx; + s! \\\[ !$g_escape_table{'['}!gx; + s! \\\] !$g_escape_table{']'}!gx; + s! \\\( !$g_escape_table{'('}!gx; + s! \\\) !$g_escape_table{')'}!gx; + s! \\> !$g_escape_table{'>'}!gx; + s! \\\# !$g_escape_table{'#'}!gx; + s! \\\+ !$g_escape_table{'+'}!gx; + s! \\\- !$g_escape_table{'-'}!gx; + s! \\\. !$g_escape_table{'.'}!gx; + s{ \\! }{$g_escape_table{'!'}}gx; + + return $_; +} + + +sub _DoAutoLinks { + my $text = shift; + + $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{
    $1}gi; + + # Email addresses: + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; + + return $text; +} + + +sub _EncodeEmailAddress { +# +# Input: an email address, e.g. "foo@example.com" +# +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: +# +# foo +# @example.com +# +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: +# + + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; + + $addr = qq{$addr}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + + return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# + my $text = shift; + + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } + return $text; +} + + +sub _TokenizeHTML { +# +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as , or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. +# +# +# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +# +# + + my $str = shift; + my $pos = 0; + my $len = length $str; + my @tokens; + + my $depth = 6; + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $match = qr/(?s: ) | # comment + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags + + while ($str =~ m/($match)/g) { + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; + } + push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; + + return \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# + my $text = shift; + + $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# +# + my $text = shift; + + $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + return $text; +} + +# +# MultiMarkdown Routines +# + +sub _ParseMetaData { + my $text = shift; + my $clean_text = ""; + + my ($inMetaData, $currentKey) = (1,''); + + # If only metadata is "Format: complete" then skip + + if ($text =~ s/^(Format):\s*complete\n(.*?)\n/$2\n/is) { + # If "Format: complete" was added automatically, don't force first + # line of text to be metadata + $g_metadata{$1}= "complete"; + $g_document_format = "complete"; + } + + foreach my $line ( split /\n/, $text ) { + $line =~ /^$/ and $inMetaData = 0; + if ($inMetaData) { + if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) { + $currentKey = $1; + my $meta = $2; + $currentKey =~ s/\s+/ /g; + $currentKey =~ s/\s$//; + $g_metadata{$currentKey} = $meta; + if (lc($currentKey) eq "format") { + $g_document_format = lc($g_metadata{$currentKey}); + } + if (lc($currentKey) eq "base url") { + $g_base_url = $g_metadata{$currentKey}; + } + if (lc($currentKey) eq "bibliography title") { + $g_bibliography_title = $g_metadata{$currentKey}; + $g_bibliography_title =~ s/\s*$//; + } + if (lc($currentKey) eq "base header level") { + $g_base_header_level = $g_metadata{$currentKey}; + } + if (!$g_metadata_newline{$currentKey}) { + $g_metadata_newline{$currentKey} = $g_metadata_newline{default}; + } + } else { + if ($currentKey eq "") { + # No metadata present + $clean_text .= "$line\n"; + $inMetaData = 0; + next; + } + if ($line =~ /^\s*(.+)$/ ) { + $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1"; + } + } + } else { + $clean_text .= "$line\n"; + } + } + + return $clean_text; +} + +sub _StripFootnoteDefinitions { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + while ($text =~ s{ + \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 + \n? + (.*?)\n{1,2} # end at new paragraph + ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + } + {\n}sx) + { + my $id = $1; + my $footnote = "$2\n"; + $footnote =~ s/^[ ]{0,$g_tab_width}//gm; + + $g_footnotes{id2footnote($id)} = $footnote; + } + + return $text; +} + +sub _DoFootnotes { + my $text = shift; + + # First, run routines that get skipped in footnotes + foreach my $label (sort keys %g_footnotes) { + my $footnote = _RunBlockGamut($g_footnotes{$label}); + + $footnote = _DoMarkdownCitations($footnote); + $g_footnotes{$label} = $footnote; + } + + $text =~ s{ + \[\^(.+?)\] # id = $1 + }{ + my $result = ""; + my $id = id2footnote($1); + if (defined $g_footnotes{$id} ) { + $g_footnote_counter++; + if ($g_footnotes{$id} =~ /^(

    )?glossary:/i) { + $result = "$g_footnote_counter"; + } else { + $result = "$g_footnote_counter"; + } + push (@g_used_footnotes,$id); + } + $result; + }xsge; + + return $text; +} + +sub _FixFootnoteParagraphs { + my $text = shift; + + $text =~ s/^\\<\/footnote\>/<\/footnote>/gm; + + return $text; +} + +sub _PrintFootnotes{ + my $footnote_counter = 0; + my $result = ""; + + foreach my $id (@g_used_footnotes) { + $footnote_counter++; + my $footnote = $g_footnotes{$id}; + my $footnote_closing_tag = ""; + + $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; + $footnote_closing_tag = $1; + + if ($footnote =~ s/^(

    )?glossary:\s*//i) { + # Add some formatting for glossary entries + + $footnote =~ s{ + ^(.*?) # $1 = term + \s* + (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key + \n + }{ + my $glossary = "$1"; + + if ($2) { + $glossary.="$2"; + }; + + $glossary . ":

    "; + }egsx; + + $result.="

  • $footnote ↩$footnote_closing_tag
  • \n\n"; + } else { + $result.="
  • $footnote ↩$footnote_closing_tag
  • \n\n"; + } + } + $result .= "\n
    "; + + if ($footnote_counter > 0) { + $result = "\n\n
    \n\n\n".$result; + } else { + $result = ""; + } + + $result= _UnescapeSpecialChars($result); + return $result; +} + +sub Header2Label { + my $header = shift; + my $label = lc $header; + $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters + while ($label =~ s/^[^A-Za-z]//g) + {}; # Strip illegal leading characters + return $label; +} + +sub id2footnote { + # Since we prepend "fn:", we can allow leading digits in footnotes + my $id = shift; + my $footnote = lc $id; + $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters + return $footnote; +} + + +sub xhtmlMetaData { + my $result = qq{\n}; + + # This screws up xsltproc - make sure to use `-nonet -novalid` if you + # have difficulty + if ($g_allow_mathml) { + $result .= qq{ +\n}; + + $result.= qq{\n\t\n}; + } else { + $result .= qq{\n}; + + $result.= qq!\n\t\n!; + } + + $result.= "\t\t\n"; + + foreach my $key (sort keys %g_metadata ) { + # Strip trailing spaces + $g_metadata{$key} =~ s/(\s)*$//s; + + # Strip spaces from key + my $export_key = $key; + $export_key =~ s/\s//g; + + if (lc($key) eq "title") { + $result.= "\t\t" . _EncodeAmpsAndAngles($g_metadata{$key}) . "\n"; + } elsif (lc($key) eq "css") { + $result.= "\t\t\n/g; + + if ($result ne "") { + $result.= "\n"; + } + + return $result; +} + +sub _ConvertCopyright{ + my $text = shift; + # Convert to an XML compatible form of copyright symbol + + $text =~ s/©/©/gi; + + return $text; +} + + +sub _DoTables { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Algorithm inspired by PHP Markdown Extra's table support + # + + # Reusable regexp's to match table + + my $line_start = qr{ + [ ]{0,$less_than_tab} + }mx; + + my $table_row = qr{ + [^\n]*?\|[^\n]*?\n + }mx; + + my $first_row = qr{ + $line_start + \S+.*?\|.*?\n + }mx; + + my $table_rows = qr{ + (\n?$table_row) + }mx; + + my $table_caption = qr{ + $line_start + \[.*?\][ \t]*\n + }mx; + + my $table_divider = qr{ + $line_start + [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* + }mx; + + my $whole_table = qr{ + ($table_caption)? # Optional caption + ($first_row # First line must start at beginning + ($table_row)*?)? # Header Rows + $table_divider # Divider/Alignment definitions + $table_rows+ # Body Rows + ($table_caption)? # Optional caption + }mx; + + + # Find whole tables, then break them up and process them + + $text =~ s{ + ^($whole_table) # Whole table in $1 + (\n|\Z) # End of file or 2 blank lines + }{ + my $table = $1; + + # Clean extra spaces at end of lines - + # they cause the processing to choke + $table =~ s/[\t ]*\n/\n/gs; + + my $result = "
    \n"; + my @alignments; + my $use_row_header = 1; + + # Add Caption, if present + + if ($table =~ s/^$line_start(?:\[\s*(.*)\s*\])?(?:\[\s*(.*?)\s*\])[ \t]*$//m) { + my $table_id = ""; + my $table_caption = ""; + + $table_id = Header2Label($2); + + if (defined $1) { + $table_caption = $1; + } else { + $table_caption = $2; + } + $result .= "\n"; + + $g_crossrefs{$table_id} = "#$table_id"; + $g_titles{$table_id} = "see table"; # captions with "stuff" in them break links + } + + # If a second "caption" is present, treat it as a summary + # However, this is not valid in XHTML 1.0 Strict + # But maybe in future + + # A summary might be longer than one line + if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { + # $result .= "" . _RunSpanGamut($1) . "\n"; + } + + # Now, divide table into header, alignment, and body + + # First, add leading \n in case there is no header + + $table = "\n" . $table; + + # Need to be greedy + + $table =~ s/\n($table_divider)\n(($table_rows)+)//s; + + my $body = ""; + my $alignment_string = ""; + if (defined $1){ + $alignment_string = $1; + } + if (defined $2){ + $body = $2; + } + + # Process column alignment + while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) { + my $cell = _RunSpanGamut($1); + if ($cell =~ /\+/){ + $result .= "$cell\n"; + } else { + $result .= "\t<$cell_type$colspan>$cell\n"; + } + $count++; + } + $result .= "\n"; + } + + # Strip out empty sections + $result =~ s/\s*<\/thead>\s*//s; + + # Handle pull-quotes + + # This might be too specific for my needs. If others want it + # removed, I am open to discussion. + + $result =~ s/
    " . _RunSpanGamut($table_caption). "
    \s*\s*/
    \n\n/s; + + $result .= "\n
    \n"; + $result + }egmx; + + my $table_body = qr{ + ( # wrap whole match in $2 + + (.*?\|.*?)\n # wrap headers in $3 + + [ ]{0,$less_than_tab} + ($table_divider) # alignment in $4 + + ( # wrap cells in $5 + $table_rows + ) + ) + }mx; + + return $text; +} + + +sub _DoAttributes{ + my $id = shift; + my $result = ""; + + if (defined $g_attributes{$id}) { + my $attributes = $g_attributes{$id}; + while ($attributes =~ s/(\S+)="(.*?)"//) { + $result .= " $1=\"$2\""; + } + while ($attributes =~ /(\S+)=(\S+)/g) { + $result .= " $1=\"$2\""; + } + } + + return $result; +} + + +sub _StripMarkdownReferences { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + while ($text =~ s{ + \n\[\#(.+?)\]:[ \t]* # id = $1 + \n? + (.*?)\n{1,2} # end at new paragraph + ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + } + {\n}sx) + { + my $id = $1; + my $reference = "$2\n"; + + $reference =~ s/^[ ]{0,$g_tab_width}//gm; + + $reference = _RunBlockGamut($reference); + + # strip leading and trailing

    tags (they will be added later) + $reference =~ s/^\//s; + $reference =~ s/\<\/p\>\s*$//s; + + $g_references{$id} = $reference; + } + + return $text; +} + +sub _DoMarkdownCitations { + my $text = shift; + + $text =~ s{ # Allow for citations without locator to be written + \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than + [ ]? # [][#author] + (?:\n[ ]*)? + \[\s*\] + }{ + "[][#$1]"; + }xsge; + + $text =~ s{ + \[([^\[]*?)\] # citation text = $1 + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[\#(.*?)\] # id = $2 + }{ + my $result; + my $anchor_text = $1; + my $id = $2; + my $count; + + # implement equivalent to \citet + my $textual_string = ""; + if ($anchor_text =~ s/^(.*?);\s*//) { + $textual_string = "$1"; + } + + if (defined $g_references{$id} ) { + my $citation_counter=0; + + # See if citation has been used before + foreach my $old_id (@g_used_references) { + $citation_counter++; + $count = $citation_counter if ($old_id eq $id); + } + + if (! defined $count) { + $g_citation_counter++; + $count = $g_citation_counter; + push (@g_used_references,$id); + } + + $result = "$textual_string ($count"; + + if ($anchor_text ne "") { + $result .=", $anchor_text"; + } + + $result .= ")"; + } else { + # No reference exists + $result = "$textual_string ($id"; + + if ($anchor_text ne "") { + $result .=", $anchor_text"; + } + + $result .= ")"; + } + + if (Header2Label($anchor_text) eq "notcited"){ + $result = ""; + } + $result; + }xsge; + + return $text; + +} + +sub _PrintMarkdownBibliography{ + my $citation_counter = 0; + my $result; + + foreach my $id (@g_used_references) { + $citation_counter++; + $result.="

    [$citation_counter] $g_references{$id}

    \n\n"; + } + $result .= "
    "; + + if ($citation_counter > 0) { + $result = "\n\n
    \n$g_bibliography_title

    \n\n".$result; + } else { + $result = ""; + } + + return $result; +} + +sub _GenerateImageCrossRefs { + my $text = shift; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $g_urls{$link_id}) { + my $label = Header2Label($alt_text); + $g_crossrefs{$label} = "#$label"; + } + else { + # If there's no such link ID, leave intact: + $result = $whole_match; + } + + $whole_match; + }xsge; + + # + # Next, handle inline images: ![alt text](url "optional title") + # Don't forget: encode * and _ + + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \( # literal paren + [ \t]* + ? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 ' + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + + $alt_text =~ s/"/"/g; + my $label = Header2Label($alt_text); + $g_crossrefs{$label} = "#$label"; + $whole_match; + }xsge; + + return $text; +} + +sub _FindMathEquations{ + my $text = shift; + + $text =~ s{ + (\]*)id=\"(.*?)\"> # " + }{ + my $label = Header2Label($2); + my $header = _RunSpanGamut($2); + + $g_crossrefs{$label} = "#$label"; + $g_titles{$label} = $header; + + $1 . "id=\"$label\">"; + }xsge; + + return $text; +} + +sub _DoMathSpans { + # Based on Gruber's _DoCodeSpans + + my $text = shift; + my $display_as_block = 0; + $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); + + $text =~ s{ + (?>) + }{ + my $m = "$2"; + my $label = ""; + my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); + + if (defined $3) { + $label = Header2Label($3); + my $header = _RunSpanGamut($3); + + $g_crossrefs{$label} = "#$label"; + $g_titles{$label} = $header; + } + $m =~ s/^[ \t]*//g; # leading whitespace + $m =~ s/[ \t]*$//g; # trailing whitespace + push(@attr,(id=>"$label")) if ($label ne ""); + push(@attr,(display=>"block")) if ($display_as_block == 1); + + $m = $mathParser->TextToMathML($m,\@attr); + "$m"; + }egsx; + + return $text; +} + +sub _DoDefinitionLists { + # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra + + my $text = shift; + my $less_than_tab = $g_tab_width -1; + + my $line_start = qr{ + [ ]{0,$less_than_tab} + }mx; + + my $term = qr{ + $line_start + [^:\s][^\n]*\n + }sx; + + my $definition = qr{ + \n?[ ]{0,$less_than_tab} + \:[ \t]+(.*?)\n + ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns, + # or the end of the document + }sx; + + my $definition_block = qr{ + ((?:$term)+) # $1 = one or more terms + ((?:$definition)+) # $2 = by one or more definitions + }sx; + + my $definition_list = qr{ + (?:$definition_block\n*)+ # One ore more definition blocks + }sx; + + $text =~ s{ + ($definition_list) # $1 = the whole list + }{ + my $list = $1; + my $result = $1; + + $list =~ s{ + (?:$definition_block)\n* + }{ + my $terms = $1; + my $defs = $2; + + $terms =~ s{ + [ ]{0,$less_than_tab} + (.*) + \s* + }{ + my $term = $1; + my $result = ""; + $term =~ s/^\s*(.*?)\s*$/$1/; + if ($term !~ /^\s*$/){ + $result = "
    " . _RunSpanGamut($1) . "
    \n"; + } + $result; + }xmge; + + $defs =~ s{ + $definition + }{ + my $def = $1 . "\n"; + $def =~ s/^[ ]{0,$g_tab_width}//gm; + "
    \n" . _RunBlockGamut($def) . "\n
    \n"; + }xsge; + + $terms . $defs . "\n"; + }xsge; + + "
    \n" . $list . "
    \n\n"; + }xsge; + + return $text +} + +sub _UnescapeComments{ + # Remove encoding inside comments + # Based on proposal by Toras Doran (author of Text::MultiMarkdown) + + my $text = shift; + $text =~ s{ + (?<=) # End comments + }{ + my $t = $1; + $t =~ s/&/&/g; + $t =~ s/</ + + +=head1 SYNOPSIS + +B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] + [ I ... ] + + +=head1 DESCRIPTION + +MultiMarkdown is an extended version of Markdown. See the website for more +information. + + http://fletcherpenney.net/multimarkdown/ + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like
    and as well). + +For more information about Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + +
    + +instead of Markdown's default XHTML style tags, e.g.: + +
    + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + + support@daringfireball.net (for Markdown issues) + + owner@fletcherpenney.net (for MultiMarkdown issues) + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output (Multi)Markdown actually produced. + + +=head1 AUTHOR + + John Gruber + http://daringfireball.net/ + + PHP port and other contributions by Michel Fortin + http://michelf.com/ + + MultiMarkdown changes by Fletcher Penney + http://fletcherpenney.net/ + +=head1 COPYRIGHT AND LICENSE + +Original Markdown Code Copyright (c) 2003-2007 John Gruber + +All rights reserved. + +MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut + diff --git a/bin/MultiMarkdown/Support.pm b/lib/MultiMarkdown/Support.pm similarity index 100% rename from bin/MultiMarkdown/Support.pm rename to lib/MultiMarkdown/Support.pm From f54bc59866b9a3eaa2159dcea0f4791823b4fbf6 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Fri, 17 Dec 2010 23:05:27 +0100 Subject: [PATCH 04/45] Rework mmd2* scripts to use the new layout In the process, simplify the MMDPath detection logic. --- bin/mmd2LaTeX.pl | 86 ++++++++++++---------------------------- bin/mmd2PDF.pl | 89 +++++++++++++----------------------------- bin/mmd2PDFXeLaTeX.pl | 88 +++++++++++++---------------------------- bin/mmd2RTF.pl | 89 +++++++++++++----------------------------- bin/mmd2XHTML.pl | 90 ++++++++++++------------------------------ bin/mmd2letter.pl | 88 +++++++++++++---------------------------- bin/mmd2web.pl | 91 ++++++++++++------------------------------- 7 files changed, 179 insertions(+), 442 deletions(-) diff --git a/bin/mmd2LaTeX.pl b/bin/mmd2LaTeX.pl index c26cc74..e530d0f 100755 --- a/bin/mmd2LaTeX.pl +++ b/bin/mmd2LaTeX.pl @@ -24,15 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -83,83 +83,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2LaTeX.pl - utility script for MultiMarkdown to convert MultiMarkdown text diff --git a/bin/mmd2PDF.pl b/bin/mmd2PDF.pl index 8a8badd..c2b0c38 100755 --- a/bin/mmd2PDF.pl +++ b/bin/mmd2PDF.pl @@ -24,15 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -79,84 +79,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - - =head1 NAME mmd2PDF - utility script for MultiMarkdown to convert MultiMarkdown text @@ -211,4 +174,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2PDFXeLaTeX.pl b/bin/mmd2PDFXeLaTeX.pl index 14877a4..9404aa8 100755 --- a/bin/mmd2PDFXeLaTeX.pl +++ b/bin/mmd2PDFXeLaTeX.pl @@ -24,15 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -79,83 +79,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2PDFXeLaTeX - utility script for MultiMarkdown to convert MultiMarkdown @@ -210,4 +174,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2RTF.pl b/bin/mmd2RTF.pl index e64ad78..f23ce55 100755 --- a/bin/mmd2RTF.pl +++ b/bin/mmd2RTF.pl @@ -24,16 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -84,83 +83,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text @@ -215,4 +178,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2XHTML.pl b/bin/mmd2XHTML.pl index 2ba6f26..537b3c7 100755 --- a/bin/mmd2XHTML.pl +++ b/bin/mmd2XHTML.pl @@ -24,16 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -82,88 +81,49 @@ sub LocateMMD { # Determine where MMD is installed. Use a "common installation" # if available. - my $sym = dirname(readlink($me)); $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } elsif ( -f "$sym/MultiMarkdown/Support.pm") { - $MMDPath = "$sym/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text diff --git a/bin/mmd2letter.pl b/bin/mmd2letter.pl index 3e2e606..8c4c3f5 100755 --- a/bin/mmd2letter.pl +++ b/bin/mmd2letter.pl @@ -25,15 +25,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -90,83 +90,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2letter - utility script for MultiMarkdown to convert MultiMarkdown text @@ -226,4 +190,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2web.pl b/bin/mmd2web.pl index db59e77..5ed0513 100755 --- a/bin/mmd2web.pl +++ b/bin/mmd2web.pl @@ -27,16 +27,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__); # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -87,89 +86,49 @@ sub LocateMMD { # Determine where MMD is installed. Use a "common installation" # if available. - my $sym = dirname(readlink($me)); - print STDERR "sym: $sym \n"; $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } elsif ( -f "$sym/MultiMarkdown/Support.pm") { - $MMDPath = "$sym/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text From 8bb14a595202836eea8a24a0d437ed8f306ef2c6 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 1 Jan 2011 22:08:35 +0100 Subject: [PATCH 05/45] More modularization progress --- lib/MultiMarkdown.pm | 60 ++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 9120023..f41d726 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -53,11 +53,10 @@ unless ($@) { use Digest::MD5 qw(md5_hex); -use vars qw($VERSION $g_use_metadata $g_base_url - $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser); -$VERSION = '2.0.b6'; -$mathParser = new Text::ASCIIMathML(); +our $VERSION = '2.0.b6'; + +our $mathParser = new Text::ASCIIMathML(); ## Disabled; causes problems under Perl 5.6.1: # use utf8; @@ -66,10 +65,10 @@ $mathParser = new Text::ASCIIMathML(); # # Global default settings: # -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; -my $g_allow_mathml = 1; -my $g_base_header_level = 1; +our $g_empty_element_suffix = " />"; # Change to ">" for HTML output +our $g_tab_width = 4; +our $g_allow_mathml = 1; +our $g_base_header_level = 1; # # Globals: @@ -77,7 +76,7 @@ my $g_base_header_level = 1; # Reusable patterns to match balanced [brackets] and (parens). See # Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my ($g_nested_brackets, $g_nested_parens); +our ($g_nested_brackets, $g_nested_parens); $g_nested_brackets = qr{ (?> # Atomic matching [^\[\]]+ # Anything other than brackets @@ -101,37 +100,38 @@ $g_nested_parens = qr{ # Table of hash values for escaped characters: -my %g_escape_table; +our %g_escape_table; foreach my $char (split //, '\\`*_{}[]()>#+-.!') { $g_escape_table{$char} = md5_hex($char); } # Global hashes, used by various utility routines -my %g_urls = (); -my %g_titles= (); -my %g_html_blocks = (); -my %g_metadata = (); -my %g_metadata_newline = (); -my %g_crossrefs = (); -my %g_footnotes = (); -my %g_attributes = (); -my @g_used_footnotes = (); -my $g_footnote_counter = 0; - -my $g_citation_counter = 0; -my @g_used_references = (); -my %g_references = (); -$g_bibliography_title = "Bibliography"; - -$g_use_metadata = 1; +our %g_urls = (); +our %g_titles= (); +our %g_html_blocks = (); +our %g_metadata = (); +our %g_metadata_newline = (); +our %g_crossrefs = (); +our %g_footnotes = (); +our %g_attributes = (); +our @g_used_footnotes = (); +our $g_footnote_counter = 0; + +our $g_citation_counter = 0; +our @g_used_references = (); +our %g_references = (); +our $g_bibliography_title = "Bibliography"; + +our $g_use_metadata = 1; $g_metadata_newline{default} = "\n"; $g_metadata_newline{keywords} = ", "; -my $g_document_format = ""; +our $g_document_format = ""; +our $g_base_url = ""; # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): -my $g_list_level = 0; +our $g_list_level = 0; #### Blosxom plug-in interface ########################################## @@ -140,7 +140,7 @@ my $g_list_level = 0; # which posts Markdown should process, using a "meta-markup: markdown" # header. If it's set to 0 (the default), Markdown will process all # entries. -my $g_blosxom_use_meta = 0; +our $g_blosxom_use_meta = 0; sub start { 1; } sub story { From 9a4f433fe4a300a1efdba2d2a84f853ae1f11174 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 2 Jan 2011 10:31:03 +0100 Subject: [PATCH 06/45] Use File::Spec for path joins --- lib/MultiMarkdown.pm | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index f41d726..22724cb 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -32,6 +32,7 @@ use strict; use warnings; use File::Basename; +use File::Spec; eval {require MT}; # Test to see if we're running in MT. unless ($@) { @@ -39,16 +40,8 @@ unless ($@) { } else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl my $me = __FILE__; # Where am I? - # Am I running in Windoze? - my $os = $^O; - - # Get just the directory portion - if ($os =~ /MSWin/) { - $me = dirname($me) . "\\"; - } else { - $me = dirname($me) . "/"; - } - require $me ."ASCIIMathML.pm"; + my $path = dirname($me); + require File::Spec->join($path, "ASCIIMathML.pm"); } From 583bf9519c54f38f17354b599a5bf3cc8bc7016d Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 2 Jan 2011 11:07:23 +0100 Subject: [PATCH 07/45] Improve binary vs module split Bring the command-line parsing stuff back into the MultiMarkdown.pl executable, and export the Markdown method from the module, making it accept options to be used to override global settings. --- bin/MultiMarkdown.pl | 298 +++++++++++++++++++++++++++++++++++++++++ lib/MultiMarkdown.pm | 306 ++----------------------------------------- 2 files changed, 308 insertions(+), 296 deletions(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 840c006..e5c2d4a 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -19,3 +19,301 @@ require MultiMarkdown; } + +import MultiMarkdown qw{Markdown}; + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +our $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "MultiMarkdown", + description => "Based on the original Markdown", + doc_link => 'http://fletcherpenney.net/multimarkdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MultiMarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('multimarkdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('multimarkdown' => { + label => 'MultiMarkdown', + docs => 'http://fletcherpenney.net/MultiMarkdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + my %opts = (); + if (defined $ctx) { + my $output = $ctx->stash('multimarkdown_output'); + if (defined $output && $output =~ m/^html/i) { + $opts{empty_element_suffix} = ">"; + $ctx->stash('multimarkdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('multimarkdown_output', ''); + } + else { + $raw = 0; + $opts{empty_element_suffix} = " />"; + } + } + $text = $raw ? $text : Markdown($text, %opts); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('multimarkdown_with_smartypants' => { + label => 'MultiMarkdown With SmartyPants', + docs => 'http://fletcherpenney.net/MultiMarkdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my %opts = (); + if (defined $ctx) { + my $output = $ctx->stash('multimarkdown_output'); + if (defined $output && $output eq 'html') { + $opts{empty_element_suffix} = ">"; + } + else { + $opts{empty_element_suffix} = " />"; + } + } + $text = Markdown($text, %opts); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + my %opts = (); + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is MultiMarkdown, version $MultiMarkdown::VERSION.\n"; + print "Original code Copyright 2004 John Gruber\n"; + print "MultiMarkdown changes Copyright 2005-2009 Fletcher Penney\n"; + print "http://fletcherpenney.net/multimarkdown/\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $MultiMarkdown::VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $opts{empty_element_suffix} = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text, %opts); + } +} + +__END__ + +=pod + +=head1 NAME + +B + + +=head1 SYNOPSIS + +B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] + [ I ... ] + + +=head1 DESCRIPTION + +MultiMarkdown is an extended version of Markdown. See the website for more +information. + + http://fletcherpenney.net/multimarkdown/ + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like
    and
    as well). + +For more information about Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + +
    + +instead of Markdown's default XHTML style tags, e.g.: + +
    + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + + support@daringfireball.net (for Markdown issues) + + owner@fletcherpenney.net (for MultiMarkdown issues) + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output (Multi)Markdown actually produced. + + +=head1 AUTHOR + + John Gruber + http://daringfireball.net/ + + PHP port and other contributions by Michel Fortin + http://michelf.com/ + + MultiMarkdown changes by Fletcher Penney + http://fletcherpenney.net/ + +=head1 COPYRIGHT AND LICENSE + +Original Markdown Code Copyright (c) 2003-2007 John Gruber + +All rights reserved. + +MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 22724cb..c189107 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -44,9 +44,11 @@ unless ($@) { require File::Spec->join($path, "ASCIIMathML.pm"); } - use Digest::MD5 qw(md5_hex); +require Exporter; +our @ISA = qw{Exporter}; +our @EXPORT = qw{Markdown}; our $VERSION = '2.0.b6'; our $mathParser = new Text::ASCIIMathML(); @@ -126,167 +128,6 @@ our $g_base_url = ""; # (see _ProcessListItems() for details): our $g_list_level = 0; - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -our $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "MultiMarkdown", - description => "Based on the original Markdown", - doc_link => 'http://fletcherpenney.net/multimarkdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MultiMarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('multimarkdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('multimarkdown' => { - label => 'MultiMarkdown', - docs => 'http://fletcherpenney.net/MultiMarkdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('multimarkdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('multimarkdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('multimarkdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('multimarkdown_with_smartypants' => { - label => 'MultiMarkdown With SmartyPants', - docs => 'http://fletcherpenney.net/MultiMarkdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('multimarkdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is MultiMarkdown, version $VERSION.\n"; - print "Original code Copyright 2004 John Gruber\n"; - print "MultiMarkdown changes Copyright 2005-2009 Fletcher Penney\n"; - print "http://fletcherpenney.net/multimarkdown/\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print Markdown($text); - } -} - - - sub Markdown { # # Main function. The order in which other subs are called here is @@ -296,6 +137,13 @@ sub Markdown { # my $text = shift; + my %opts = @_; + + if (defined $opts{empty_element_suffix}) { + $g_empty_element_suffix = $opts{empty_element_suffix}; + } + + # Clear the global hashes. If we don't clear these, you get conflicts # from other articles when generating a page which contains more than # one article (e.g. an index page that shows the N most recent @@ -2555,138 +2403,4 @@ sub _UnescapeComments{ 1; -__END__ - - -=pod - -=head1 NAME - -B - - -=head1 SYNOPSIS - -B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] - [ I ... ] - - -=head1 DESCRIPTION - -MultiMarkdown is an extended version of Markdown. See the website for more -information. - - http://fletcherpenney.net/multimarkdown/ - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like
    and
    as well). - -For more information about Markdown's syntax, see: - - http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - -
    - -instead of Markdown's default XHTML style tags, e.g.: - -
    - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - - support@daringfireball.net (for Markdown issues) - - owner@fletcherpenney.net (for MultiMarkdown issues) - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output (Multi)Markdown actually produced. - - -=head1 AUTHOR - - John Gruber - http://daringfireball.net/ - - PHP port and other contributions by Michel Fortin - http://michelf.com/ - - MultiMarkdown changes by Fletcher Penney - http://fletcherpenney.net/ - -=head1 COPYRIGHT AND LICENSE - -Original Markdown Code Copyright (c) 2003-2007 John Gruber - -All rights reserved. - -MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut From 890e89c1ad622f167f2e76632884f154b966ec24 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 2 Jan 2011 11:25:20 +0100 Subject: [PATCH 08/45] Collect settings into a hash This allows us to make the settings overridable via the options passed to Markdown() in a much simpler way than the clumsy if (defined $opts{somesetting}) { $g_somesetting = $opts{somesetting} ; } that would need to be repeated for every setting. --- lib/MultiMarkdown.pm | 114 ++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index c189107..a2934c7 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -60,10 +60,18 @@ our $mathParser = new Text::ASCIIMathML(); # # Global default settings: # -our $g_empty_element_suffix = " />"; # Change to ">" for HTML output -our $g_tab_width = 4; -our $g_allow_mathml = 1; -our $g_base_header_level = 1; +our %g_settings = () ; + +sub reset_defaults { + $g_settings{empty_element_suffix} = " />"; # Change to ">" for HTML output + $g_settings{tab_width} = 4; + $g_settings{allow_mathml} = 1; + $g_settings{base_header_level} = 1; + $g_settings{use_metadata} = 1; + $g_settings{bibliography_title} = "Bibliography"; + $g_settings{document_format} = ""; + $g_settings{base_url} = ""; +} # # Globals: @@ -116,13 +124,9 @@ our $g_footnote_counter = 0; our $g_citation_counter = 0; our @g_used_references = (); our %g_references = (); -our $g_bibliography_title = "Bibliography"; -our $g_use_metadata = 1; $g_metadata_newline{default} = "\n"; $g_metadata_newline{keywords} = ", "; -our $g_document_format = ""; -our $g_base_url = ""; # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): @@ -139,10 +143,11 @@ sub Markdown { my %opts = @_; - if (defined $opts{empty_element_suffix}) { - $g_empty_element_suffix = $opts{empty_element_suffix}; - } + reset_defaults(); + foreach (keys %opts) { + $g_settings{$_} = $opts{$_}; + } # Clear the global hashes. If we don't clear these, you get conflicts # from other articles when generating a page which contains more than @@ -161,7 +166,6 @@ sub Markdown { $g_citation_counter = 0; %g_attributes = (); - # Standardize line endings: $text =~ s{\r\n}{\n}g; # DOS to Unix $text =~ s{\r}{\n}g; # Mac to Unix @@ -179,7 +183,7 @@ sub Markdown { $text =~ s/^[ \t]+$//mg; # Strip out MetaData - $text = _ParseMetaData($text) if $g_use_metadata; + $text = _ParseMetaData($text) if $g_settings{use_metadata}; # And recheck for leading blank lines $text =~ s/^\n+//s; @@ -214,12 +218,12 @@ sub Markdown { $text = _ConvertCopyright($text); - if (lc($g_document_format) =~ /^complete\s*$/i) { + if (lc($g_settings{document_format}) =~ /^complete\s*$/i) { return xhtmlMetaData() . "\n\n" . $text . "\n\n"; - } elsif (lc($g_document_format) =~ /^snippet\s*$/i) { + } elsif (lc($g_settings{document_format}) =~ /^snippet\s*$/i) { return $text . "\n"; } else { - return $g_document_format . textMetaData() . $text . "\n"; + return $g_settings{document_format} . textMetaData() . $text . "\n"; } } @@ -231,7 +235,7 @@ sub _StripLinkDefinitions { # hash references. # my $text = shift; - my $less_than_tab = $g_tab_width - 1; + my $less_than_tab = $g_settings{tab_width} - 1; # Link defs are in the form: ^[id]: url "optional title" while ($text =~ s{ @@ -292,7 +296,7 @@ sub _StripHTML { sub _HashHTMLBlocks { my $text = shift; - my $less_than_tab = $g_tab_width - 1; + my $less_than_tab = $g_settings{tab_width} - 1; # Hashify HTML blocks: # We only want to do this for block-level HTML tags, such as headers, @@ -454,9 +458,9 @@ sub _RunBlockGamut { $text = _HashHTMLBlocks($text); # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n" . $header . "\n\n"; }egmx; @@ -862,7 +866,7 @@ sub _DoHeaders { $idString = ""; } - my $h_level = $g_base_header_level +1; + my $h_level = $g_settings{base_header_level} +1; "" . $header . "\n\n"; }egmx; @@ -885,7 +889,7 @@ sub _DoHeaders { \#* # optional closing #'s (not counted) \n+ }{ - my $h_level = length($1) + $g_base_header_level - 1; + my $h_level = length($1) + $g_settings{base_header_level} - 1; if (defined $3) { $label = Header2Label($3); } else { @@ -914,7 +918,7 @@ sub _DoLists { # Form HTML ordered (numbered) and unordered (bulleted) lists. # my $text = shift; - my $less_than_tab = $g_tab_width - 1; + my $less_than_tab = $g_settings{tab_width} - 1; # Re-usable patterns to match list item bullets and number markers: my $marker_ul = qr/[*+-]/; @@ -1083,11 +1087,11 @@ sub _DoCodeBlocks { (?:\n\n|\A) ( # $1 = the code block -- one or more lines, starting with a space/tab (?: - (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces + (?:[ ]{$g_settings{tab_width}} | \t) # Lines must start with a tab or a tab-width of spaces .*\n+ )+ ) - ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ((?=^[ ]{0,$g_settings{tab_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc }{ my $codeblock = $1; my $result; # return value @@ -1522,7 +1526,7 @@ sub _Outdent { # my $text = shift; - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + $text =~ s/^(\t|[ ]{1,$g_settings{tab_width}})//gm; return $text; } @@ -1534,7 +1538,7 @@ sub _Detab { # my $text = shift; - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + $text =~ s{(.*?)\t}{$1.(' ' x ($g_settings{tab_width} - length($1) % $g_settings{tab_width}))}ge; return $text; } @@ -1554,7 +1558,7 @@ sub _ParseMetaData { # If "Format: complete" was added automatically, don't force first # line of text to be metadata $g_metadata{$1}= "complete"; - $g_document_format = "complete"; + $g_settings{document_format} = "complete"; } foreach my $line ( split /\n/, $text ) { @@ -1567,17 +1571,17 @@ sub _ParseMetaData { $currentKey =~ s/\s$//; $g_metadata{$currentKey} = $meta; if (lc($currentKey) eq "format") { - $g_document_format = lc($g_metadata{$currentKey}); + $g_settings{document_format} = lc($g_metadata{$currentKey}); } if (lc($currentKey) eq "base url") { - $g_base_url = $g_metadata{$currentKey}; + $g_settings{base_url} = $g_metadata{$currentKey}; } if (lc($currentKey) eq "bibliography title") { - $g_bibliography_title = $g_metadata{$currentKey}; - $g_bibliography_title =~ s/\s*$//; + $g_settings{bibliography_title} = $g_metadata{$currentKey}; + $g_settings{bibliography_title} =~ s/\s*$//; } if (lc($currentKey) eq "base header level") { - $g_base_header_level = $g_metadata{$currentKey}; + $g_settings{base_header_level} = $g_metadata{$currentKey}; } if (!$g_metadata_newline{$currentKey}) { $g_metadata_newline{$currentKey} = $g_metadata_newline{default}; @@ -1603,7 +1607,7 @@ sub _ParseMetaData { sub _StripFootnoteDefinitions { my $text = shift; - my $less_than_tab = $g_tab_width - 1; + my $less_than_tab = $g_settings{tab_width} - 1; while ($text =~ s{ \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 @@ -1615,7 +1619,7 @@ sub _StripFootnoteDefinitions { { my $id = $1; my $footnote = "$2\n"; - $footnote =~ s/^[ ]{0,$g_tab_width}//gm; + $footnote =~ s/^[ ]{0,$g_settings{tab_width}}//gm; $g_footnotes{id2footnote($id)} = $footnote; } @@ -1700,7 +1704,7 @@ sub _PrintFootnotes{ $result .= "\n"; if ($footnote_counter > 0) { - $result = "\n\n
    \n\n\n".$result; + $result = "\n\n
    \n\n\n".$result; } else { $result = ""; } @@ -1732,7 +1736,7 @@ sub xhtmlMetaData { # This screws up xsltproc - make sure to use `-nonet -novalid` if you # have difficulty - if ($g_allow_mathml) { + if ($g_settings{allow_mathml}) { $result .= qq{ \n}; @@ -1756,12 +1760,12 @@ sub xhtmlMetaData { if (lc($key) eq "title") { $result.= "\t\t" . _EncodeAmpsAndAngles($g_metadata{$key}) . "\n"; } elsif (lc($key) eq "css") { - $result.= "\t\t @@ -1913,22 +1917,22 @@ sub _DoTables { } if ($cell =~ /\:$/) { if ($cell =~ /^\:/) { - $result .= " align=\"center\"$g_empty_element_suffix\n"; + $result .= " align=\"center\"$g_settings{empty_element_suffix}\n"; push(@alignments,"center"); } else { - $result .= " align=\"right\"$g_empty_element_suffix\n"; + $result .= " align=\"right\"$g_settings{empty_element_suffix}\n"; push(@alignments,"right"); } } else { if ($cell =~ /^\:/) { - $result .= " align=\"left\"$g_empty_element_suffix\n"; + $result .= " align=\"left\"$g_settings{empty_element_suffix}\n"; push(@alignments,"left"); } else { if (($cell =~ /^\./) || ($cell =~ /\.$/)) { - $result .= " align=\"char\"$g_empty_element_suffix\n"; + $result .= " align=\"char\"$g_settings{empty_element_suffix}\n"; push(@alignments,"char"); } else { - $result .= "$g_empty_element_suffix\n"; + $result .= "$g_settings{empty_element_suffix}\n"; push(@alignments,""); } } @@ -2057,7 +2061,7 @@ sub _DoAttributes{ sub _StripMarkdownReferences { my $text = shift; - my $less_than_tab = $g_tab_width - 1; + my $less_than_tab = $g_settings{tab_width} - 1; while ($text =~ s{ \n\[\#(.+?)\]:[ \t]* # id = $1 @@ -2070,7 +2074,7 @@ sub _StripMarkdownReferences { my $id = $1; my $reference = "$2\n"; - $reference =~ s/^[ ]{0,$g_tab_width}//gm; + $reference =~ s/^[ ]{0,$g_settings{tab_width}}//gm; $reference = _RunBlockGamut($reference); @@ -2167,7 +2171,7 @@ sub _PrintMarkdownBibliography{ $result .= "
    "; if ($citation_counter > 0) { - $result = "\n\n
    \n$g_bibliography_title

    \n\n".$result; + $result = "\n\n
    \n$g_settings{bibliography_title}

    \n\n".$result; } else { $result = ""; } @@ -2312,7 +2316,7 @@ sub _DoDefinitionLists { # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra my $text = shift; - my $less_than_tab = $g_tab_width -1; + my $less_than_tab = $g_settings{tab_width} -1; my $line_start = qr{ [ ]{0,$less_than_tab} @@ -2369,7 +2373,7 @@ sub _DoDefinitionLists { $definition }{ my $def = $1 . "\n"; - $def =~ s/^[ ]{0,$g_tab_width}//gm; + $def =~ s/^[ ]{0,$g_settings{tab_width}}//gm; "
    \n" . _RunBlockGamut($def) . "\n
    \n"; }xsge; From 0bec43c42fc76d1dac75c081ae0dc38d0ba10afe Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 07:43:10 +0100 Subject: [PATCH 09/45] Prepare _HashHTMLBlocks extractor Setting up the tagged extractor used by _HashHTMLBlocks outside of the sub, we can squeeze off some runtime in case of repeated processing. --- lib/MultiMarkdown.pm | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index a2934c7..bafc5ae 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -294,45 +294,45 @@ sub _StripHTML { return $text; } -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; +# Hashify HTML blocks: +# We only want to do this for block-level HTML tags, such as headers, +# lists, and tables. That's because we still want to wrap

    s around +# "paragraphs" that are wrapped in non-block-level tags, such as anchors, +# phrase emphasis, and spans. The list of tags we're looking for is +# hard-coded: +my $block_tags = qr{ + (?: + p | div | h[1-6] | blockquote | pre | table | + dl | ol | ul | script | noscript | form | + fieldset | iframe | ins | del + ) + }x; # MultiMarkdown does not include `math` in the above list so that + # Equations can optionally be included in separate paragraphs + +my $tag_attrs = qr{ + (?: # Match one attr name/value pair + \s+ # There needs to be at least some whitespace + # before each attribute name. + [\w.:_-]+ # Attribute name + \s*=\s* + (?: + ".+?" # "Attribute value" + | + '.+?' # 'Attribute value' + ) + )* # Zero or more + }x; - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

    s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags = qr{ - (?: - p | div | h[1-6] | blockquote | pre | table | - dl | ol | ul | script | noscript | form | - fieldset | iframe | ins | del - ) - }x; # MultiMarkdown does not include `math` in the above list so that - # Equations can optionally be included in separate paragraphs - - my $tag_attrs = qr{ - (?: # Match one attr name/value pair - \s+ # There needs to be at least some whitespace - # before each attribute name. - [\w.:_-]+ # Attribute name - \s*=\s* - (?: - ".+?" # "Attribute value" - | - '.+?' # 'Attribute value' - ) - )* # Zero or more - }x; +my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; +my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; +my $close_tag = undef; # let Text::Balanced handle this - my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; - my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; - my $close_tag = undef; # let Text::Balanced handle this +use Text::Balanced qw(gen_extract_tagged); +my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); - use Text::Balanced qw(gen_extract_tagged); - my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_settings{tab_width} - 1; my @chunks; ## TO-DO: the 0,3 on the next line ought to respect the From d758734ed3fcfe08f28de30f8c18b1095f500129 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 08:42:56 +0100 Subject: [PATCH 10/45] Early bailout from _DoDefinitionLists The search & replace in _DoDefinitionLists takes a lot of time even if no replacement is being done. Optimize by bailing out early if no line looking like a definition is found. --- lib/MultiMarkdown.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index bafc5ae..e5abbac 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -2317,6 +2317,8 @@ sub _DoDefinitionLists { my $text = shift; my $less_than_tab = $g_settings{tab_width} -1; + + return $text unless $text =~ /\n[ ]{0,$less_than_tab}\:[ \t]+/; my $line_start = qr{ [ ]{0,$less_than_tab} From be84578a2f615b35aa1d64be1cd666f34eec2afa Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 09:28:33 +0100 Subject: [PATCH 11/45] Small _HashHTMLBlocks optimization Skip the block tag hashing if there are no block tags. --- lib/MultiMarkdown.pm | 48 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index e5abbac..adaf990 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -334,36 +334,38 @@ sub _HashHTMLBlocks { my $text = shift; my $less_than_tab = $g_settings{tab_width} - 1; - my @chunks; - ## TO-DO: the 0,3 on the next line ought to respect the - ## tabwidth, or else, we should mandate 4-space tabwidth and - ## be done with it: - while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { - my $cur_line = $1; - if (defined $2) { - # current line could be start of code block - - my ($tag, $remainder) = $extract_block->($cur_line . $text); - if ($tag) { - my $key = md5_hex($tag); - $g_html_blocks{$key} = $tag; - push @chunks, "\n\n" . $key . "\n\n"; - $text = $remainder; + if ($text =~ /<$block_tags/) { + my @chunks; + ## TO-DO: the 0,3 on the next line ought to respect the + ## tabwidth, or else, we should mandate 4-space tabwidth and + ## be done with it: + while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { + my $cur_line = $1; + if (defined $2) { + # current line could be start of code block + + my ($tag, $remainder) = $extract_block->($cur_line . $text); + if ($tag) { + my $key = md5_hex($tag); + $g_html_blocks{$key} = $tag; + push @chunks, "\n\n" . $key . "\n\n"; + $text = $remainder; + } + else { + # No tag match, so toss $cur_line into @chunks + push @chunks, $cur_line; + } } else { - # No tag match, so toss $cur_line into @chunks + # current line could NOT be start of code block push @chunks, $cur_line; } + } - else { - # current line could NOT be start of code block - push @chunks, $cur_line; - } + push @chunks, $text; # Whatever is left. + $text = join '', @chunks; } - push @chunks, $text; # Whatever is left. - - $text = join '', @chunks; From 9573fa27cb5f146564509811ac3b9b3d153c2edf Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 14:09:14 +0100 Subject: [PATCH 12/45] Read links when finding who we are --- bin/MultiMarkdown.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index e5c2d4a..60d73c6 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -9,7 +9,7 @@ eval {require MultiMarkdown}; if ($@) { - my $me = __FILE__; + my $me = readlink(__FILE__); my $path = dirname(dirname($me)); From 7f8cadeacfd9e7ba335e86aeebc8738693ce393a Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 14:27:52 +0100 Subject: [PATCH 13/45] Process block quotes earlier This fixes an issue where a heading would follow a list with a single item containing a block quote: the heading would be absorbed by the block quote, and fail to get expanded (a similar failure happened with standard Markdown, where the heading would still get expanded but still appear within the blockquote). --- lib/MultiMarkdown.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index adaf990..44ba807 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -464,10 +464,10 @@ sub _RunBlockGamut { $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n Date: Sat, 8 Jan 2011 16:27:01 +0100 Subject: [PATCH 14/45] Move some lines around, getting closer to the typical Perl module --- lib/MultiMarkdown.pm | 45 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 44ba807..8e6cc3b 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1,5 +1,3 @@ -#!/usr/bin/env perl - # MultiMarkdown -- A modification of John Gruber's original Markdown # that adds new features and an output format that can more readily # be converted into other document formats @@ -33,6 +31,11 @@ use warnings; use File::Basename; use File::Spec; +use Digest::MD5 qw(md5_hex); +use base 'Exporter'; + +our $VERSION = '2.0.b6'; +our @EXPORT_OK = qw{Markdown}; eval {require MT}; # Test to see if we're running in MT. unless ($@) { @@ -44,35 +47,12 @@ unless ($@) { require File::Spec->join($path, "ASCIIMathML.pm"); } -use Digest::MD5 qw(md5_hex); - -require Exporter; -our @ISA = qw{Exporter}; -our @EXPORT = qw{Markdown}; -our $VERSION = '2.0.b6'; - our $mathParser = new Text::ASCIIMathML(); ## Disabled; causes problems under Perl 5.6.1: # use utf8; # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html -# -# Global default settings: -# -our %g_settings = () ; - -sub reset_defaults { - $g_settings{empty_element_suffix} = " />"; # Change to ">" for HTML output - $g_settings{tab_width} = 4; - $g_settings{allow_mathml} = 1; - $g_settings{base_header_level} = 1; - $g_settings{use_metadata} = 1; - $g_settings{bibliography_title} = "Bibliography"; - $g_settings{document_format} = ""; - $g_settings{base_url} = ""; -} - # # Globals: # @@ -108,6 +88,21 @@ foreach my $char (split //, '\\`*_{}[]()>#+-.!') { $g_escape_table{$char} = md5_hex($char); } +# +# Global default settings: +# +our %g_settings = () ; + +sub reset_defaults { + $g_settings{empty_element_suffix} = " />"; # Change to ">" for HTML output + $g_settings{tab_width} = 4; + $g_settings{allow_mathml} = 1; + $g_settings{base_header_level} = 1; + $g_settings{use_metadata} = 1; + $g_settings{bibliography_title} = "Bibliography"; + $g_settings{document_format} = ""; + $g_settings{base_url} = ""; +} # Global hashes, used by various utility routines our %g_urls = (); From 3a5d919850113df6eeeba483d975775516d1d501 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 8 Jan 2011 19:29:54 +0100 Subject: [PATCH 15/45] Object-oriented interface This brings the module interface on par with Text::MultiMarkdown from CPAN, except for some extra parameters offered by that module. --- bin/MultiMarkdown.pl | 10 +- lib/MultiMarkdown.pm | 660 +++++++++++++++++++++++-------------------- 2 files changed, 351 insertions(+), 319 deletions(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 60d73c6..c9ecd2f 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -20,7 +20,7 @@ require MultiMarkdown; } -import MultiMarkdown qw{Markdown}; +import MultiMarkdown qw{markdown}; #### Blosxom plug-in interface ########################################## @@ -37,7 +37,7 @@ sub story { if ( (! $g_blosxom_use_meta) or (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) ){ - $$body_ref = Markdown($$body_ref); + $$body_ref = markdown($$body_ref); } 1; } @@ -101,7 +101,7 @@ sub story { $opts{empty_element_suffix} = " />"; } } - $text = $raw ? $text : Markdown($text, %opts); + $text = $raw ? $text : markdown($text, %opts); $text; }, }); @@ -131,7 +131,7 @@ sub story { $opts{empty_element_suffix} = " />"; } } - $text = Markdown($text, %opts); + $text = markdown($text, %opts); $text = $smartypants->($text, '1'); }, }); @@ -180,7 +180,7 @@ sub story { local $/; # Slurp the whole file $text = <>; } - print Markdown($text, %opts); + print markdown($text, \%opts); } } diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 8e6cc3b..354c7d1 100755 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -32,10 +32,11 @@ use warnings; use File::Basename; use File::Spec; use Digest::MD5 qw(md5_hex); +use Carp qw(croak); use base 'Exporter'; our $VERSION = '2.0.b6'; -our @EXPORT_OK = qw{Markdown}; +our @EXPORT_OK = qw{markdown}; eval {require MT}; # Test to see if we're running in MT. unless ($@) { @@ -91,136 +92,164 @@ foreach my $char (split //, '\\`*_{}[]()>#+-.!') { # # Global default settings: # -our %g_settings = () ; - -sub reset_defaults { - $g_settings{empty_element_suffix} = " />"; # Change to ">" for HTML output - $g_settings{tab_width} = 4; - $g_settings{allow_mathml} = 1; - $g_settings{base_header_level} = 1; - $g_settings{use_metadata} = 1; - $g_settings{bibliography_title} = "Bibliography"; - $g_settings{document_format} = ""; - $g_settings{base_url} = ""; + +our %g_default_settings = ( + allow_mathml => 1, + base_header_level => 1, + base_url => "", + bibliography_title => "Bibliography", + document_format => "", + empty_element_suffix => " />", + tab_width => 4, + use_metadata => 1, +); + +sub new { + my ($class, %params) = @_; + + my %p = %g_default_settings; + foreach (keys %params) { + $p{$_} = $params{$_}; + } + + my $self = { params => \%p }; + bless $self, ref($class) || $class; + return $self; +} + +sub markdown { + my ( $self, $text, $options ) = @_; + + # Detect functional mode, and create an instance for this run.. + unless (ref $self) { + if ( $self ne __PACKAGE__ ) { + my $ob = __PACKAGE__->new(); + # $self is text, $text is options + return $ob->markdown($self, $text); + } + else { + croak('Calling ' . $self . '->markdown (as a class method) is not supported.'); + } + } + + $options ||= {}; + + %$self = (%{ $self->{params} }, %$options, params => $self->{params}); + + $self->_CleanUpRunData($options); + + return $self->_Markdown($text); } # Global hashes, used by various utility routines -our %g_urls = (); -our %g_titles= (); -our %g_html_blocks = (); -our %g_metadata = (); -our %g_metadata_newline = (); -our %g_crossrefs = (); -our %g_footnotes = (); -our %g_attributes = (); -our @g_used_footnotes = (); -our $g_footnote_counter = 0; - -our $g_citation_counter = 0; -our @g_used_references = (); -our %g_references = (); - -$g_metadata_newline{default} = "\n"; -$g_metadata_newline{keywords} = ", "; +# Clear the global hashes. If we don't clear these, you get conflicts +# from other articles when generating a page which contains more than +# one article (e.g. an index page that shows the N most recent +# articles): +sub _CleanUpRunData($$) { + my ($self, $options) = @_; + + $self->{_urls} = {}; + $self->{_titles} = {}; + $self->{_html_blocks} = {}; + + $self->{_metadata} = {}; + $self->{_metadata_newline} = { + 'default' => "\n", + 'keywords' => ", ", + }; + $self->{_crossrefs} = {}; + $self->{_footnotes} = {}; + $self->{_attributes} = {}; + $self->{_used_footnotes} = []; + $self->{_footnote_counter} = 0; + $self->{_used_references} = []; + $self->{_citation_counter} = 0; + $self->{_references} = {}; # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): -our $g_list_level = 0; + $self->{_list_level} = 0; +} + -sub Markdown { +sub _Markdown { # # Main function. The order in which other subs are called here is # essential. Link and image substitutions need to happen before # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the # and tags get encoded. # - my $text = shift; + my ($self, $text) = @_; + + $text = $self->_CleanUpDoc($text); + + # Strip out MetaData + $text = $self->_ParseMetaData($text) if $self->{use_metadata}; + + # And recheck for leading blank lines + $text =~ s/^\n+//s; - my %opts = @_; + # Turn block-level HTML blocks into hash entries + $text = $self->_HashHTMLBlocks($text); - reset_defaults(); + # Strip footnote and link definitions, store in hashes. + $text = $self->_StripFootnoteDefinitions($text); - foreach (keys %opts) { - $g_settings{$_} = $opts{$_}; + $text = $self->_StripLinkDefinitions($text); + + $self->_GenerateImageCrossRefs($text); + + $text = $self->_StripMarkdownReferences($text); + + $text = $self->_RunBlockGamut($text); + + $text = $self->_DoMarkdownCitations($text); + + $text = $self->_DoFootnotes($text); + + $text = _UnescapeSpecialChars($text); + + # Clean encoding within HTML comments + $text = $self->_UnescapeComments($text); + + $text = $self->_FixFootnoteParagraphs($text); + $text .= $self->_PrintFootnotes(); + + $text .= $self->_PrintMarkdownBibliography(); + + $text = _ConvertCopyright($text); + + if (lc($self->{document_format}) =~ /^complete\s*$/i) { + return $self->xhtmlMetaData() . "\n\n" . $text . "\n\n"; + } elsif (lc($self->{document_format}) =~ /^snippet\s*$/i) { + return $text . "\n"; + } else { + return $self->{document_format} . $self->textMetaData() . $text . "\n"; } - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - %g_metadata = (); - %g_crossrefs = (); - %g_footnotes = (); - @g_used_footnotes = (); - $g_footnote_counter = 0; - @g_used_references = (); - %g_references = (); - $g_citation_counter = 0; - %g_attributes = (); +} + +sub _CleanUpDoc { + my ($self, $text) = @_; # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix # Make sure $text ends with a couple of newlines: $text .= "\n\n"; # Convert all tabs to spaces. - $text = _Detab($text); + $text = $self->_Detab($text); # Strip any lines consisting only of spaces and tabs. # This makes subsequent regexen easier to write, because we can # match consecutive blank lines with /\n+/ instead of something # contorted like /[ \t]*\n+/ . $text =~ s/^[ \t]+$//mg; - - # Strip out MetaData - $text = _ParseMetaData($text) if $g_settings{use_metadata}; - - # And recheck for leading blank lines - $text =~ s/^\n+//s; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip footnote and link definitions, store in hashes. - $text = _StripFootnoteDefinitions($text); - - $text = _StripLinkDefinitions($text); - _GenerateImageCrossRefs($text); - - $text = _StripMarkdownReferences($text); - - $text = _RunBlockGamut($text); - - $text = _DoMarkdownCitations($text); - - $text = _DoFootnotes($text); - - $text = _UnescapeSpecialChars($text); - - # Clean encoding within HTML comments - $text = _UnescapeComments($text); - - $text = _FixFootnoteParagraphs($text); - $text .= _PrintFootnotes(); - - $text .= _PrintMarkdownBibliography(); - - $text = _ConvertCopyright($text); - - if (lc($g_settings{document_format}) =~ /^complete\s*$/i) { - return xhtmlMetaData() . "\n\n" . $text . "\n\n"; - } elsif (lc($g_settings{document_format}) =~ /^snippet\s*$/i) { - return $text . "\n"; - } else { - return $g_settings{document_format} . textMetaData() . $text . "\n"; - } - + return $text; } @@ -229,8 +258,8 @@ sub _StripLinkDefinitions { # Strips link definitions from text, stores the URLs and titles in # hash references. # - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; # Link defs are in the form: ^[id]: url "optional title" while ($text =~ s{ @@ -263,16 +292,16 @@ sub _StripLinkDefinitions { (?:\n+|\Z) } {}mx) { -# $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - $g_urls{lc $1} = $2; # Link IDs are case-insensitive +# $self->{_urls}{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + $self->{_urls}{lc $1} = $2; # Link IDs are case-insensitive if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; + $self->{_titles}{lc $1} = $3; + $self->{_titles}{lc $1} =~ s/"/"/g; } # MultiMarkdown addition " if ($4) { - $g_attributes{lc $1} = $4; + $self->{_attributes}{lc $1} = $4; } # /addition } @@ -295,7 +324,7 @@ sub _StripHTML { # "paragraphs" that are wrapped in non-block-level tags, such as anchors, # phrase emphasis, and spans. The list of tags we're looking for is # hard-coded: -my $block_tags = qr{ +our $g_block_tags = qr{ (?: p | div | h[1-6] | blockquote | pre | table | dl | ol | ul | script | noscript | form | @@ -304,7 +333,7 @@ my $block_tags = qr{ }x; # MultiMarkdown does not include `math` in the above list so that # Equations can optionally be included in separate paragraphs -my $tag_attrs = qr{ +our $g_tag_attrs = qr{ (?: # Match one attr name/value pair \s+ # There needs to be at least some whitespace # before each attribute name. @@ -318,18 +347,18 @@ my $tag_attrs = qr{ )* # Zero or more }x; -my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; -my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; -my $close_tag = undef; # let Text::Balanced handle this +our $g_empty_tag = qr{< \w+ $g_tag_attrs \s* />}xms; +our $g_open_tag = qr{< $g_block_tags $g_tag_attrs \s* >}xms; +our $g_close_tag = undef; # let Text::Balanced handle this use Text::Balanced qw(gen_extract_tagged); -my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); +our $g_extract_block = gen_extract_tagged($g_open_tag, $g_close_tag, undef, { ignore => [$g_empty_tag] }); sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; - if ($text =~ /<$block_tags/) { + if ($text =~ /<$g_block_tags/) { my @chunks; ## TO-DO: the 0,3 on the next line ought to respect the ## tabwidth, or else, we should mandate 4-space tabwidth and @@ -339,10 +368,10 @@ sub _HashHTMLBlocks { if (defined $2) { # current line could be start of code block - my ($tag, $remainder) = $extract_block->($cur_line . $text); + my ($tag, $remainder) = $g_extract_block->($cur_line . $text); if ($tag) { my $key = md5_hex($tag); - $g_html_blocks{$key} = $tag; + $self->{_html_blocks}{$key} = $tag; push @chunks, "\n\n" . $key . "\n\n"; $text = $remainder; } @@ -383,7 +412,7 @@ sub _HashHTMLBlocks { ) }{ my $key = md5_hex($1); - $g_html_blocks{$key} = $1; + $self->{_html_blocks}{$key} = $1; "\n\n" . $key . "\n\n"; }egx; @@ -406,7 +435,7 @@ sub _HashHTMLBlocks { ) }{ my $key = md5_hex($1); - $g_html_blocks{$key} = $1; + $self->{_html_blocks}{$key} = $1; "\n\n" . $key . "\n\n"; }egx; @@ -429,7 +458,7 @@ sub _HashHTMLBlocks { ) }{ my $key = md5_hex($1); - $g_html_blocks{$key} = $1; + $self->{_html_blocks}{$key} = $1; "\n\n" . $key . "\n\n"; }egx; @@ -443,33 +472,33 @@ sub _RunBlockGamut { # These are all the transformations that form block-level # tags like paragraphs, headers, and list items. # - my $text = shift; + my ($self, $text) = @_; - $text = _DoHeaders($text); + $text = $self->_DoHeaders($text); # Do tables first to populate the table id's for cross-refs # Escape

     so we don't get greedy with tables
    -	$text = _DoTables($text);
    +	$text = $self->_DoTables($text);
     	
     	# And now, protect our tables
    -	$text = _HashHTMLBlocks($text);
    +	$text = $self->_HashHTMLBlocks($text);
     
     	# Do Horizontal Rules:
    -	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
    +	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
    +	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
     
    -	$text = _DoBlockQuotes($text);
    -	$text = _DoDefinitionLists($text);
    -	$text = _DoLists($text);
    -	$text = _DoCodeBlocks($text);
    +	$text = $self->_DoBlockQuotes($text);
    +	$text = $self->_DoDefinitionLists($text);
    +	$text = $self->_DoLists($text);
    +	$text = $self->_DoCodeBlocks($text);
     
     	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
     	# was to escape raw HTML in the original Markdown source. This time,
     	# we're escaping the markup we've just created, so that we don't wrap
     	# 
  • " . $item . "
  • \n"; }egmx; - $g_list_level--; + $self->{_list_level}--; return $list_str; } @@ -1078,23 +1106,23 @@ sub _DoCodeBlocks { # Process Markdown `
    ` blocks.
     #	
     
    -	my $text = shift;
    +	my ($self, $text) = @_;
     
     	$text =~ s{
     			(?:\n\n|\A)
     			(	            # $1 = the code block -- one or more lines, starting with a space/tab
     			  (?:
    -			    (?:[ ]{$g_settings{tab_width}} | \t)  # Lines must start with a tab or a tab-width of spaces
    +			    (?:[ ]{$self->{tab_width}} | \t)  # Lines must start with a tab or a tab-width of spaces
     			    .*\n+
     			  )+
     			)
    -			((?=^[ ]{0,$g_settings{tab_width}}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +			((?=^[ ]{0,$self->{tab_width}}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
     		}{
     			my $codeblock = $1;
     			my $result; # return value
     
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    +			$codeblock = _EncodeCode($self->_Outdent($codeblock));
    +			$codeblock = $self->_Detab($codeblock);
     			$codeblock =~ s/\A\n+//; # trim leading newlines
     			$codeblock =~ s/\n+\z//; # trim trailing newlines
     
    @@ -1227,7 +1255,7 @@ sub _DoItalicsAndBold {
     
     
     sub _DoBlockQuotes {
    -	my $text = shift;
    +	my ($self, $text) = @_;
     
     	$text =~ s{
     		  (								# Wrap whole match in $1
    @@ -1242,7 +1270,7 @@ sub _DoBlockQuotes {
     			my $bq = $1;
     			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting
     			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines
    -			$bq = _RunBlockGamut($bq);		# recurse
    +			$bq = $self->_RunBlockGamut($bq);		# recurse
     
     			$bq =~ s/^/  /g;
     			# These leading spaces screw with 
     content, so we need to fix that:
    @@ -1267,7 +1295,7 @@ sub _FormParagraphs {
     #	Params:
     #		$text - string to process with html 

    tags # - my $text = shift; + my ($self, $text) = @_; # Strip leading and trailing lines: $text =~ s/\A\n+//; @@ -1279,8 +1307,8 @@ sub _FormParagraphs { # Wrap

    tags. # foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); + unless (defined( $self->{_html_blocks}{$_} )) { + $_ = $self->_RunSpanGamut($_); s/^([ \t]*)/

    /; $_ .= "

    "; } @@ -1290,7 +1318,7 @@ sub _FormParagraphs { # Unhashify HTML blocks # # foreach my $graf (@grafs) { -# my $block = $g_html_blocks{$graf}; +# my $block = $self->{_html_blocks}{$graf}; # if (defined $block) { # $graf = $block; # } @@ -1298,7 +1326,7 @@ sub _FormParagraphs { foreach my $graf (@grafs) { # Modify elements of @grafs in-place... - my $block = $g_html_blocks{$graf}; + my $block = $self->{_html_blocks}{$graf}; if (defined $block) { $graf = $block; if ($block =~ m{ @@ -1325,9 +1353,9 @@ sub _FormParagraphs { # We can't call Markdown(), because that resets the hash; # that initialization code should be pulled into its own sub, though. - $div_content = _HashHTMLBlocks($div_content); - $div_content = _StripLinkDefinitions($div_content); - $div_content = _RunBlockGamut($div_content); + $div_content = $self->_HashHTMLBlocks($div_content); + $div_content = $self->_StripLinkDefinitions($div_content); + $div_content = $self->_RunBlockGamut($div_content); $div_content = _UnescapeSpecialChars($div_content); $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; @@ -1388,7 +1416,7 @@ sub _EncodeBackslashEscapes { sub _DoAutoLinks { - my $text = shift; + my ($self, $text) = @_; $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{$1}gi; @@ -1521,9 +1549,9 @@ sub _Outdent { # # Remove one level of line-leading tabs or spaces # - my $text = shift; + my ($self, $text) = @_; - $text =~ s/^(\t|[ ]{1,$g_settings{tab_width}})//gm; + $text =~ s/^(\t|[ ]{1,$self->{tab_width}})//gm; return $text; } @@ -1533,9 +1561,9 @@ sub _Detab { # Cribbed from a post by Bart Lateur: # # - my $text = shift; + my ($self, $text) = @_; - $text =~ s{(.*?)\t}{$1.(' ' x ($g_settings{tab_width} - length($1) % $g_settings{tab_width}))}ge; + $text =~ s{(.*?)\t}{$1.(' ' x ($self->{tab_width} - length($1) % $self->{tab_width}))}ge; return $text; } @@ -1544,7 +1572,7 @@ sub _Detab { # sub _ParseMetaData { - my $text = shift; + my ($self, $text) = @_; my $clean_text = ""; my ($inMetaData, $currentKey) = (1,''); @@ -1554,8 +1582,8 @@ sub _ParseMetaData { if ($text =~ s/^(Format):\s*complete\n(.*?)\n/$2\n/is) { # If "Format: complete" was added automatically, don't force first # line of text to be metadata - $g_metadata{$1}= "complete"; - $g_settings{document_format} = "complete"; + $self->{_metadata}{$1}= "complete"; + $self->{document_format} = "complete"; } foreach my $line ( split /\n/, $text ) { @@ -1566,22 +1594,22 @@ sub _ParseMetaData { my $meta = $2; $currentKey =~ s/\s+/ /g; $currentKey =~ s/\s$//; - $g_metadata{$currentKey} = $meta; + $self->{_metadata}{$currentKey} = $meta; if (lc($currentKey) eq "format") { - $g_settings{document_format} = lc($g_metadata{$currentKey}); + $self->{document_format} = lc($self->{_metadata}{$currentKey}); } if (lc($currentKey) eq "base url") { - $g_settings{base_url} = $g_metadata{$currentKey}; + $self->{base_url} = $self->{_metadata}{$currentKey}; } if (lc($currentKey) eq "bibliography title") { - $g_settings{bibliography_title} = $g_metadata{$currentKey}; - $g_settings{bibliography_title} =~ s/\s*$//; + $self->{bibliography_title} = $self->{_metadata}{$currentKey}; + $self->{bibliography_title} =~ s/\s*$//; } if (lc($currentKey) eq "base header level") { - $g_settings{base_header_level} = $g_metadata{$currentKey}; + $self->{base_header_level} = $self->{_metadata}{$currentKey}; } - if (!$g_metadata_newline{$currentKey}) { - $g_metadata_newline{$currentKey} = $g_metadata_newline{default}; + if (!$self->{_metadata_newline}{$currentKey}) { + $self->{_metadata_newline}{$currentKey} = $self->{_metadata_newline}{default}; } } else { if ($currentKey eq "") { @@ -1591,7 +1619,7 @@ sub _ParseMetaData { next; } if ($line =~ /^\s*(.+)$/ ) { - $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1"; + $self->{_metadata}{$currentKey} .= "$self->{_metadata_newline}{$currentKey}$1"; } } } else { @@ -1603,8 +1631,8 @@ sub _ParseMetaData { } sub _StripFootnoteDefinitions { - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; while ($text =~ s{ \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 @@ -1616,23 +1644,23 @@ sub _StripFootnoteDefinitions { { my $id = $1; my $footnote = "$2\n"; - $footnote =~ s/^[ ]{0,$g_settings{tab_width}}//gm; + $footnote =~ s/^[ ]{0,$self->{tab_width}}//gm; - $g_footnotes{id2footnote($id)} = $footnote; + $self->{_footnotes}{id2footnote($id)} = $footnote; } return $text; } sub _DoFootnotes { - my $text = shift; + my ($self, $text) = @_; # First, run routines that get skipped in footnotes - foreach my $label (sort keys %g_footnotes) { - my $footnote = _RunBlockGamut($g_footnotes{$label}); + foreach my $label (sort keys %{$self->{_footnotes}}) { + my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}); - $footnote = _DoMarkdownCitations($footnote); - $g_footnotes{$label} = $footnote; + $footnote = $self->_DoMarkdownCitations($footnote); + $self->{_footnotes}{$label} = $footnote; } $text =~ s{ @@ -1640,14 +1668,14 @@ sub _DoFootnotes { }{ my $result = ""; my $id = id2footnote($1); - if (defined $g_footnotes{$id} ) { - $g_footnote_counter++; - if ($g_footnotes{$id} =~ /^(

    )?glossary:/i) { - $result = "$g_footnote_counter"; + if (defined $self->{_footnotes}{$id} ) { + $self->{_footnote_counter}++; + if ($self->{_footnotes}{$id} =~ /^(

    )?glossary:/i) { + $result = "$self->{_footnote_counter}"; } else { - $result = "$g_footnote_counter"; + $result = "$self->{_footnote_counter}"; } - push (@g_used_footnotes,$id); + push (@{$self->{_used_footnotes}},$id); } $result; }xsge; @@ -1656,20 +1684,21 @@ sub _DoFootnotes { } sub _FixFootnoteParagraphs { - my $text = shift; + my ($self, $text) = @_; $text =~ s/^\\<\/footnote\>/<\/footnote>/gm; return $text; } -sub _PrintFootnotes{ +sub _PrintFootnotes { + my $self = shift; my $footnote_counter = 0; my $result = ""; - foreach my $id (@g_used_footnotes) { + foreach my $id (@{$self->{_used_footnotes}}) { $footnote_counter++; - my $footnote = $g_footnotes{$id}; + my $footnote = $self->{_footnotes}{$id}; my $footnote_closing_tag = ""; $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; @@ -1701,7 +1730,7 @@ sub _PrintFootnotes{ $result .= "\n

    tags around block-level tags. - $text = _HashHTMLBlocks($text); - $text = _FormParagraphs($text); + $text = $self->_HashHTMLBlocks($text); + $text = $self->_FormParagraphs($text); return $text; } @@ -480,27 +509,27 @@ sub _RunSpanGamut { # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. # - my $text = shift; + my ($self, $text) = @_; $text = _DoCodeSpans($text); - $text = _DoMathSpans($text); + $text = $self->_DoMathSpans($text); $text = _EscapeSpecialCharsWithinTagAttributes($text); $text = _EncodeBackslashEscapes($text); # Process anchor and image tags. Images must come first, # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); + $text = $self->_DoImages($text); + $text = $self->_DoAnchors($text); # Make links out of things like `` # Must come after _DoAnchors(), because you can use < and > # delimiters in inline links like [this](). - $text = _DoAutoLinks($text); + $text = $self->_DoAutoLinks($text); $text = _EncodeAmpsAndAngles($text); $text = _DoItalicsAndBold($text); # Do hard breaks: - $text =~ s/ {2,}\n/ {empty_element_suffix}\n/g; return $text; } @@ -535,7 +564,7 @@ sub _DoAnchors { # # Turn Markdown link shortcuts into XHTML tags. # - my $text = shift; + my ($self, $text) = @_; # # First, handle reference-style links: [link text] [id] @@ -565,31 +594,31 @@ sub _DoAnchors { # Allow automatic cross-references to headers my $label = Header2Label($link_id); - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; + if (defined $self->{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. $result = "{_titles}{$link_id} ) { + my $title = $self->{_titles}{$link_id}; $title =~ s! \* !$g_escape_table{'*'}!gx; $title =~ s! _ !$g_escape_table{'_'}!gx; $result .= " title=\"$title\""; } - $result .= _DoAttributes($label); + $result .= $self->_DoAttributes($label); $result .= ">$link_text"; - } elsif (defined $g_crossrefs{$label}) { - my $url = $g_crossrefs{$label}; + } elsif (defined $self->{_crossrefs}{$label}) { + my $url = $self->{_crossrefs}{$label}; $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. $result = "{_titles}{$label} ) { + my $title = $self->{_titles}{$label}; $title =~ s! \* !$g_escape_table{'*'}!gx; $title =~ s! _ !$g_escape_table{'_'}!gx; $result .= " title=\"$title\""; } - $result .= _DoAttributes($label); + $result .= $self->_DoAttributes($label); $result .= ">$link_text"; } else { $result = $whole_match; @@ -659,31 +688,31 @@ sub _DoAnchors { # Allow automatic cross-references to headers my $label = Header2Label($link_id); - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; + if (defined $self->{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. $result = "{_titles}{$link_id} ) { + my $title = $self->{_titles}{$link_id}; $title =~ s! \* !$g_escape_table{'*'}!gx; $title =~ s! _ !$g_escape_table{'_'}!gx; $result .= " title=\"$title\""; } - $result .= _DoAttributes($link_id); + $result .= $self->_DoAttributes($link_id); $result .= ">$link_text"; - } elsif (defined $g_crossrefs{$label}) { - my $url = $g_crossrefs{$label}; + } elsif (defined $self->{_crossrefs}{$label}) { + my $url = $self->{_crossrefs}{$label}; $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. $result = "{_titles}{$label} ) { + my $title = $self->{_titles}{$label}; $title =~ s! \* !$g_escape_table{'*'}!gx; $title =~ s! _ !$g_escape_table{'_'}!gx; $result .= " title=\"$title\""; } - $result .= _DoAttributes($label); + $result .= $self->_DoAttributes($label); $result .= ">$link_text"; } else { $result = $whole_match; @@ -699,7 +728,7 @@ sub _DoImages { # # Turn Markdown image shortcuts into tags. # - my $text = shift; + my ($self, $text) = @_; # # First, handle reference-style labeled images: ![alt text][id] @@ -729,26 +758,26 @@ sub _DoImages { } $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; + if (defined $self->{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; - if (! defined $g_titles{$link_id}) { - $g_titles{$link_id} = $alt_text; + $self->{_crossrefs}{$label} = "#$label"; + if (! defined $self->{_titles}{$link_id}) { + $self->{_titles}{$link_id} = $alt_text; } $result = "\"$alt_text\"";{_titles}{$link_id}) { + my $title = $self->{_titles}{$link_id}; $title =~ s! \* !$g_escape_table{'*'}!gx; $title =~ s! _ !$g_escape_table{'_'}!gx; $result .= " title=\"$title\""; } - $result .= _DoAttributes($link_id); - $result .= $g_settings{empty_element_suffix}; + $result .= $self->_DoAttributes($link_id); + $result .= $self->{empty_element_suffix}; } else { # If there's no such link ID, leave intact: @@ -794,8 +823,8 @@ sub _DoImages { $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; -# $g_titles{$label} = $alt_text; # I think this line should not be here + $self->{_crossrefs}{$label} = "#$label"; +# $self->{_titles}{$label} = $alt_text; # I think this line should not be here $result = "\"$alt_text\"";{empty_element_suffix}; $result; }xsge; @@ -813,7 +842,7 @@ sub _DoImages { sub _DoHeaders { - my $text = shift; + my ($self, $text) = @_; my $header = ""; my $label = ""; my $idString = ""; @@ -831,17 +860,17 @@ sub _DoHeaders { } else { $label = Header2Label($1); } - $header = _RunSpanGamut($1); + $header = $self->_RunSpanGamut($1); $header =~ s/^\s*//s; if ($label ne "") { - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = _StripHTML($header); - $idString = " id=\"$label\""; + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; } else { $idString = ""; } - my $h_level = $g_settings{base_header_level}; + my $h_level = $self->{base_header_level}; "" . $header . "\n\n"; }egmx; @@ -852,18 +881,18 @@ sub _DoHeaders { } else { $label = Header2Label($1); } - $header = _RunSpanGamut($1); + $header = $self->_RunSpanGamut($1); $header =~ s/^\s*//s; if ($label ne "") { - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = _StripHTML($header); - $idString = " id=\"$label\""; + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; } else { $idString = ""; } - my $h_level = $g_settings{base_header_level} +1; + my $h_level = $self->{base_header_level} +1; "" . $header . "\n\n"; }egmx; @@ -886,19 +915,19 @@ sub _DoHeaders { \#* # optional closing #'s (not counted) \n+ }{ - my $h_level = length($1) + $g_settings{base_header_level} - 1; + my $h_level = length($1) + $self->{base_header_level} - 1; if (defined $3) { $label = Header2Label($3); } else { $label = Header2Label($2); } - $header = _RunSpanGamut($2); + $header = $self->_RunSpanGamut($2); $header =~ s/^\s*//s; if ($label ne "") { - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = _StripHTML($header); - $idString = " id=\"$label\""; + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; } else { $idString = ""; } @@ -914,8 +943,8 @@ sub _DoLists { # # Form HTML ordered (numbered) and unordered (bulleted) lists. # - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; # Re-usable patterns to match list item bullets and number markers: my $marker_ul = qr/[*+-]/; @@ -949,7 +978,7 @@ sub _DoLists { # # Note: There's a bit of duplication here. My original implementation # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # $self->{_list_level}, and then only ran the $text =~ s{...}{...}egmx # substitution once, using the scalar as the pattern. This worked, # everywhere except when running under MT on my hosting account at Pair # Networks. There, this caused all rebuilds to be killed by the reaper (or @@ -962,7 +991,7 @@ sub _DoLists { # afoul of the reaper. Thus, the slightly redundant code that uses two # static s/// patterns rather than one conditional pattern. - if ($g_list_level) { + if ($self->{_list_level}) { $text =~ s{ ^ $whole_list @@ -973,7 +1002,7 @@ sub _DoLists { # Turn double returns into triple returns, so that we can make a # paragraph for the last item in a list, if necessary: $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); + my $result = $self->_ProcessListItems($list, $marker_any); # Trim any trailing whitespace, to put the closing `` # up on the preceding line, to get it past the current stupid @@ -994,7 +1023,7 @@ sub _DoLists { # Turn double returns into triple returns, so that we can make a # paragraph for the last item in a list, if necessary: $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); + my $result = $self->_ProcessListItems($list, $marker_any); $result = "<$list_type>\n" . $result . "\n"; $result; }egmx; @@ -1011,11 +1040,10 @@ sub _ProcessListItems { # into individual list items. # - my $list_str = shift; - my $marker_any = shift; + my ($self, $list_str, $marker_any) = @_; - # The $g_list_level global keeps track of when we're inside a list. + # The $self->{_list_level} global keeps track of when we're inside a list. # Each time we enter a list, we increment it; when we leave a list, # we decrement. If it's zero, we're not in a list anymore. # @@ -1036,7 +1064,7 @@ sub _ProcessListItems { # change the syntax rules such that sub-lists must start with a # starting cardinal number; e.g. "1." or "a.". - $g_list_level++; + $self->{_list_level}++; # trim trailing blank lines: $list_str =~ s/\n{2,}\z/\n/; @@ -1055,19 +1083,19 @@ sub _ProcessListItems { my $leading_space = $2; if ($leading_line or ($item =~ m/\n{2,}/)) { - $item = _RunBlockGamut(_Outdent($item)); + $item = $self->_RunBlockGamut($self->_Outdent($item)); } else { # Recursion for sub-lists: - $item = _DoLists(_Outdent($item)); + $item = $self->_DoLists($self->_Outdent($item)); chomp $item; - $item = _RunSpanGamut($item); + $item = $self->_RunSpanGamut($item); } "

    "; if ($footnote_counter > 0) { - $result = "\n\n
    \n\n\n".$result; + $result = "\n\n
    \n{empty_element_suffix}\n
      \n\n".$result; } else { $result = ""; } @@ -1729,11 +1758,12 @@ sub id2footnote { sub xhtmlMetaData { + my $self = shift; my $result = qq{\n}; # This screws up xsltproc - make sure to use `-nonet -novalid` if you # have difficulty - if ($g_settings{allow_mathml}) { + if ($self->{allow_mathml}) { $result .= qq{ \n}; @@ -1746,23 +1776,23 @@ sub xhtmlMetaData { $result.= "\t\t\n"; - foreach my $key (sort keys %g_metadata ) { + foreach my $key (sort keys %{$self->{_metadata}}) { # Strip trailing spaces - $g_metadata{$key} =~ s/(\s)*$//s; + $self->{_metadata}{$key} =~ s/(\s)*$//s; # Strip spaces from key my $export_key = $key; $export_key =~ s/\s//g; if (lc($key) eq "title") { - $result.= "\t\t" . _EncodeAmpsAndAngles($g_metadata{$key}) . "\n"; + $result.= "\t\t" . _EncodeAmpsAndAngles($self->{_metadata}{$key}) . "\n"; } elsif (lc($key) eq "css") { - $result.= "\t\t{_metadata}{$key}\"$self->{empty_element_suffix}\n"; } elsif (lc($export_key) eq "xhtmlheader") { - $result .= "\t\t$g_metadata{$key}\n"; + $result .= "\t\t$self->{_metadata}{$key}\n"; } else { - my $encodedMeta = _EncodeAmpsAndAngles($g_metadata{$key}); - $result.= qq!\t\t{_metadata}{$key}); + $result.= qq!\t\t{empty_element_suffix}\n!; } } $result.= "\t\n"; @@ -1771,10 +1801,11 @@ sub xhtmlMetaData { } sub textMetaData { + my $self = shift; my $result = ""; - foreach my $key (sort keys %g_metadata ) { - $result .= "$key: $g_metadata{$key}\n"; + foreach my $key (sort keys %{$self->{_metadata}}) { + $result .= "$key: $self->{_metadata}{$key}\n"; } $result =~ s/\s*\n/
      \n/g; @@ -1796,8 +1827,8 @@ sub _ConvertCopyright{ sub _DoTables { - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; # Algorithm inspired by PHP Markdown Extra's table support # @@ -1870,10 +1901,10 @@ sub _DoTables { } else { $table_caption = $2; } - $result .= "
    \n"; + $result .= "\n"; - $g_crossrefs{$table_id} = "#$table_id"; - $g_titles{$table_id} = "see table"; # captions with "stuff" in them break links + $self->{_crossrefs}{$table_id} = "#$table_id"; + $self->{_titles}{$table_id} = "see table"; # captions with "stuff" in them break links } # If a second "caption" is present, treat it as a summary @@ -1882,7 +1913,7 @@ sub _DoTables { # A summary might be longer than one line if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { - # $result .= "" . _RunSpanGamut($1) . "\n"; + # $result .= "" . $self->_RunSpanGamut($1) . "\n"; } # Now, divide table into header, alignment, and body @@ -1906,7 +1937,7 @@ sub _DoTables { # Process column alignment while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) { - my $cell = _RunSpanGamut($1); + my $cell = $self->_RunSpanGamut($1); if ($cell =~ /\+/){ $result .= "{empty_element_suffix}\n"; push(@alignments,"center"); } else { - $result .= " align=\"right\"$g_settings{empty_element_suffix}\n"; + $result .= " align=\"right\"$self->{empty_element_suffix}\n"; push(@alignments,"right"); } } else { if ($cell =~ /^\:/) { - $result .= " align=\"left\"$g_settings{empty_element_suffix}\n"; + $result .= " align=\"left\"$self->{empty_element_suffix}\n"; push(@alignments,"left"); } else { if (($cell =~ /^\./) || ($cell =~ /\.$/)) { - $result .= " align=\"char\"$g_settings{empty_element_suffix}\n"; + $result .= " align=\"char\"$self->{empty_element_suffix}\n"; push(@alignments,"char"); } else { - $result .= "$g_settings{empty_element_suffix}\n"; + $result .= "$self->{empty_element_suffix}\n"; push(@alignments,""); } } @@ -1950,7 +1981,7 @@ sub _DoTables { my $count=0; while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) { # process contents of each cell - my $cell = _RunSpanGamut($1); + my $cell = $self->_RunSpanGamut($1); my $ending = $2; my $colspan = ""; if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) { @@ -1983,7 +2014,7 @@ sub _DoTables { my $count=0; while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) { # process contents of each cell - my $cell = _RunSpanGamut($1); + my $cell = $self->_RunSpanGamut($1); my $ending = ""; if ($2 ne ""){ $ending = $2; @@ -2039,11 +2070,11 @@ sub _DoTables { sub _DoAttributes{ - my $id = shift; + my ($self, $id) = @_; my $result = ""; - if (defined $g_attributes{$id}) { - my $attributes = $g_attributes{$id}; + if (defined $self->{_attributes}{$id}) { + my $attributes = $self->{_attributes}{$id}; while ($attributes =~ s/(\S+)="(.*?)"//) { $result .= " $1=\"$2\""; } @@ -2057,8 +2088,8 @@ sub _DoAttributes{ sub _StripMarkdownReferences { - my $text = shift; - my $less_than_tab = $g_settings{tab_width} - 1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; while ($text =~ s{ \n\[\#(.+?)\]:[ \t]* # id = $1 @@ -2071,22 +2102,22 @@ sub _StripMarkdownReferences { my $id = $1; my $reference = "$2\n"; - $reference =~ s/^[ ]{0,$g_settings{tab_width}}//gm; + $reference =~ s/^[ ]{0,$self->{tab_width}}//gm; - $reference = _RunBlockGamut($reference); + $reference = $self->_RunBlockGamut($reference); # strip leading and trailing

    tags (they will be added later) $reference =~ s/^\//s; $reference =~ s/\<\/p\>\s*$//s; - $g_references{$id} = $reference; + $self->{_references}{$id} = $reference; } return $text; } sub _DoMarkdownCitations { - my $text = shift; + my ($self, $text) = @_; $text =~ s{ # Allow for citations without locator to be written \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than @@ -2114,19 +2145,19 @@ sub _DoMarkdownCitations { $textual_string = "$1"; } - if (defined $g_references{$id} ) { + if (defined $self->{_references}{$id} ) { my $citation_counter=0; # See if citation has been used before - foreach my $old_id (@g_used_references) { + foreach my $old_id (@{$self->{_used_references}}) { $citation_counter++; $count = $citation_counter if ($old_id eq $id); } if (! defined $count) { - $g_citation_counter++; - $count = $g_citation_counter; - push (@g_used_references,$id); + $self->{_citation_counter}++; + $count = $self->{_citation_counter}; + push (@{$self->{_used_references}},$id); } $result = "$textual_string ($count"; @@ -2158,17 +2189,18 @@ sub _DoMarkdownCitations { } sub _PrintMarkdownBibliography{ + my $self = shift; my $citation_counter = 0; my $result; - foreach my $id (@g_used_references) { + foreach my $id (@{$self->{_used_references}}) { $citation_counter++; - $result.="

    [$citation_counter] $g_references{$id}

    \n\n"; + $result.="

    [$citation_counter] $self->{_references}{$id}

    \n\n"; } $result .= ""; if ($citation_counter > 0) { - $result = "\n\n
    \n$g_settings{bibliography_title}

    \n\n".$result; + $result = "\n\n
    \n{empty_element_suffix}\n

    $self->{bibliography_title}

    \n\n".$result; } else { $result = ""; } @@ -2177,7 +2209,7 @@ sub _PrintMarkdownBibliography{ } sub _GenerateImageCrossRefs { - my $text = shift; + my ($self, $text) = @_; # # First, handle reference-style labeled images: ![alt text][id] @@ -2207,9 +2239,9 @@ sub _GenerateImageCrossRefs { } $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { + if (defined $self->{_urls}{$link_id}) { my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; + $self->{_crossrefs}{$label} = "#$label"; } else { # If there's no such link ID, leave intact: @@ -2247,7 +2279,7 @@ sub _GenerateImageCrossRefs { $alt_text =~ s/"/"/g; my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; + $self->{_crossrefs}{$label} = "#$label"; $whole_match; }xsge; @@ -2255,16 +2287,16 @@ sub _GenerateImageCrossRefs { } sub _FindMathEquations{ - my $text = shift; + my ($self, $text) = @_; $text =~ s{ (\]*)id=\"(.*?)\"> # " }{ my $label = Header2Label($2); - my $header = _RunSpanGamut($2); + my $header = $self->_RunSpanGamut($2); - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; $1 . "id=\"$label\">"; }xsge; @@ -2275,7 +2307,7 @@ sub _FindMathEquations{ sub _DoMathSpans { # Based on Gruber's _DoCodeSpans - my $text = shift; + my ($self, $text) = @_; my $display_as_block = 0; $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); @@ -2292,10 +2324,10 @@ sub _DoMathSpans { if (defined $3) { $label = Header2Label($3); - my $header = _RunSpanGamut($3); + my $header = $self->_RunSpanGamut($3); - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; } $m =~ s/^[ \t]*//g; # leading whitespace $m =~ s/[ \t]*$//g; # trailing whitespace @@ -2312,8 +2344,8 @@ sub _DoMathSpans { sub _DoDefinitionLists { # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra - my $text = shift; - my $less_than_tab = $g_settings{tab_width} -1; + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} -1; return $text unless $text =~ /\n[ ]{0,$less_than_tab}\:[ \t]+/; @@ -2363,7 +2395,7 @@ sub _DoDefinitionLists { my $result = ""; $term =~ s/^\s*(.*?)\s*$/$1/; if ($term !~ /^\s*$/){ - $result = "
    " . _RunSpanGamut($1) . "
    \n"; + $result = "
    " . $self->_RunSpanGamut($1) . "
    \n"; } $result; }xmge; @@ -2372,8 +2404,8 @@ sub _DoDefinitionLists { $definition }{ my $def = $1 . "\n"; - $def =~ s/^[ ]{0,$g_settings{tab_width}}//gm; - "
    \n" . _RunBlockGamut($def) . "\n
    \n"; + $def =~ s/^[ ]{0,$self->{tab_width}}//gm; + "
    \n" . $self->_RunBlockGamut($def) . "\n
    \n"; }xsge; $terms . $defs . "\n"; @@ -2389,7 +2421,7 @@ sub _UnescapeComments{ # Remove encoding inside comments # Based on proposal by Toras Doran (author of Text::MultiMarkdown) - my $text = shift; + my ($self, $text) = @_; $text =~ s{ (?<=\n"; - + foreach my $key (sort keys %{$self->{_metadata}}) { # Strip trailing spaces $self->{_metadata}{$key} =~ s/(\s)*$//s; - + # Strip spaces from key my $export_key = $key; $export_key =~ s/\s//g; - + if (lc($key) eq "title") { $result.= "\t\t" . _EncodeAmpsAndAngles($self->{_metadata}{$key}) . "\n"; } elsif (lc($key) eq "css") { @@ -1796,32 +1796,32 @@ sub xhtmlMetaData { } } $result.= "\t\n"; - + return $result; } sub textMetaData { my $self = shift; my $result = ""; - + foreach my $key (sort keys %{$self->{_metadata}}) { $result .= "$key: $self->{_metadata}{$key}\n"; } $result =~ s/\s*\n/
    \n/g; - + if ($result ne "") { $result.= "\n"; } - + return $result; } sub _ConvertCopyright{ my $text = shift; # Convert to an XML compatible form of copyright symbol - + $text =~ s/©/©/gi; - + return $text; } @@ -1829,39 +1829,39 @@ sub _ConvertCopyright{ sub _DoTables { my ($self, $text) = @_; my $less_than_tab = $self->{tab_width} - 1; - + # Algorithm inspired by PHP Markdown Extra's table support # - + # Reusable regexp's to match table - + my $line_start = qr{ [ ]{0,$less_than_tab} }mx; - + my $table_row = qr{ [^\n]*?\|[^\n]*?\n }mx; - + my $first_row = qr{ $line_start \S+.*?\|.*?\n }mx; - + my $table_rows = qr{ (\n?$table_row) }mx; - + my $table_caption = qr{ $line_start \[.*?\][ \t]*\n }mx; - + my $table_divider = qr{ $line_start - [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* + [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* }mx; - + my $whole_table = qr{ ($table_caption)? # Optional caption ($first_row # First line must start at beginning @@ -1870,26 +1870,26 @@ sub _DoTables { $table_rows+ # Body Rows ($table_caption)? # Optional caption }mx; - - + + # Find whole tables, then break them up and process them - + $text =~ s{ ^($whole_table) # Whole table in $1 (\n|\Z) # End of file or 2 blank lines }{ my $table = $1; - - # Clean extra spaces at end of lines - + + # Clean extra spaces at end of lines - # they cause the processing to choke $table =~ s/[\t ]*\n/\n/gs; - + my $result = "
    " . _RunSpanGamut($table_caption). "" . $self->_RunSpanGamut($table_caption). "
    \n"; my @alignments; my $use_row_header = 1; - + # Add Caption, if present - + if ($table =~ s/^$line_start(?:\[\s*(.*)\s*\])?(?:\[\s*(.*?)\s*\])[ \t]*$//m) { my $table_id = ""; my $table_caption = ""; @@ -1902,28 +1902,28 @@ sub _DoTables { $table_caption = $2; } $result .= "\n"; - + $self->{_crossrefs}{$table_id} = "#$table_id"; $self->{_titles}{$table_id} = "see table"; # captions with "stuff" in them break links } - + # If a second "caption" is present, treat it as a summary # However, this is not valid in XHTML 1.0 Strict # But maybe in future - + # A summary might be longer than one line if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { # $result .= "" . $self->_RunSpanGamut($1) . "\n"; } - + # Now, divide table into header, alignment, and body # First, add leading \n in case there is no header - + $table = "\n" . $table; - + # Need to be greedy - + $table =~ s/\n($table_divider)\n(($table_rows)+)//s; my $body = ""; @@ -1966,15 +1966,15 @@ sub _DoTables { } } } - + # Process headers $table =~ s/^\n+//s; - + $result .= "\n"; - + # Strip blank lines $table =~ s/\n[ \t]*\n/\n/g; - + foreach my $line (split(/\n/, $table)) { # process each line (row) in table $result .= "\n"; @@ -1999,9 +1999,9 @@ sub _DoTables { } $result .= "\n"; } - + # Process body - + $result .= "\n\n"; foreach my $line (split(/\n/, $body)) { @@ -2046,25 +2046,25 @@ sub _DoTables { # removed, I am open to discussion. $result =~ s/
    " . $self->_RunSpanGamut($table_caption). "
    \s*\s*/
    \n\n/s; - + $result .= "\n
    \n"; $result }egmx; - + my $table_body = qr{ ( # wrap whole match in $2 - - (.*?\|.*?)\n # wrap headers in $3 - + + (.*?\|.*?)\n # wrap headers in $3 + [ ]{0,$less_than_tab} - ($table_divider) # alignment in $4 - + ($table_divider) # alignment in $4 + ( # wrap cells in $5 $table_rows ) ) }mx; - + return $text; } @@ -2072,7 +2072,7 @@ sub _DoTables { sub _DoAttributes{ my ($self, $id) = @_; my $result = ""; - + if (defined $self->{_attributes}{$id}) { my $attributes = $self->{_attributes}{$id}; while ($attributes =~ s/(\S+)="(.*?)"//) { @@ -2082,7 +2082,7 @@ sub _DoAttributes{ $result .= " $1=\"$2\""; } } - + return $result; } @@ -2103,22 +2103,22 @@ sub _StripMarkdownReferences { my $reference = "$2\n"; $reference =~ s/^[ ]{0,$self->{tab_width}}//gm; - + $reference = $self->_RunBlockGamut($reference); # strip leading and trailing

    tags (they will be added later) $reference =~ s/^\//s; $reference =~ s/\<\/p\>\s*$//s; - + $self->{_references}{$id} = $reference; } - + return $text; } sub _DoMarkdownCitations { my ($self, $text) = @_; - + $text =~ s{ # Allow for citations without locator to be written \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than [ ]? # [][#author] @@ -2127,7 +2127,7 @@ sub _DoMarkdownCitations { }{ "[][#$1]"; }xsge; - + $text =~ s{ \[([^\[]*?)\] # citation text = $1 [ ]? # one optional space @@ -2147,25 +2147,25 @@ sub _DoMarkdownCitations { if (defined $self->{_references}{$id} ) { my $citation_counter=0; - + # See if citation has been used before foreach my $old_id (@{$self->{_used_references}}) { $citation_counter++; $count = $citation_counter if ($old_id eq $id); } - + if (! defined $count) { $self->{_citation_counter}++; $count = $self->{_citation_counter}; push (@{$self->{_used_references}},$id); } - + $result = "$textual_string ($count"; - + if ($anchor_text ne "") { $result .=", $anchor_text"; } - + $result .= ")"; } else { # No reference exists @@ -2174,16 +2174,16 @@ sub _DoMarkdownCitations { if ($anchor_text ne "") { $result .=", $anchor_text"; } - + $result .= ")"; } - + if (Header2Label($anchor_text) eq "notcited"){ $result = ""; } $result; }xsge; - + return $text; } @@ -2192,7 +2192,7 @@ sub _PrintMarkdownBibliography{ my $self = shift; my $citation_counter = 0; my $result; - + foreach my $id (@{$self->{_used_references}}) { $citation_counter++; $result.="

    [$citation_counter] $self->{_references}{$id}

    \n\n"; @@ -2203,8 +2203,8 @@ sub _PrintMarkdownBibliography{ $result = "\n\n
    \n{empty_element_suffix}\n

    $self->{bibliography_title}

    \n\n".$result; } else { $result = ""; - } - + } + return $result; } @@ -2261,9 +2261,9 @@ sub _GenerateImageCrossRefs { (.*?) # alt text = $2 \] \( # literal paren - [ \t]* + [ \t]* ? # src url = $3 - [ \t]* + [ \t]* ( # $4 (['"]) # quote char = $5 ' (.*?) # title = $6 @@ -2288,40 +2288,40 @@ sub _GenerateImageCrossRefs { sub _FindMathEquations{ my ($self, $text) = @_; - + $text =~ s{ - (\]*)id=\"(.*?)\"> # " + (\]*)id=\"(.*?)\"> # " }{ my $label = Header2Label($2); my $header = $self->_RunSpanGamut($2); - + $self->{_crossrefs}{$label} = "#$label"; $self->{_titles}{$label} = $header; - + $1 . "id=\"$label\">"; }xsge; - + return $text; } sub _DoMathSpans { # Based on Gruber's _DoCodeSpans - + my ($self, $text) = @_; - my $display_as_block = 0; + my $display_as_block = 0; $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); $text =~ s{ (?>) }{ - my $m = "$2"; + my $m = "$2"; my $label = ""; my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); - + if (defined $3) { $label = Header2Label($3); my $header = $self->_RunSpanGamut($3); @@ -2329,12 +2329,12 @@ sub _DoMathSpans { $self->{_crossrefs}{$label} = "#$label"; $self->{_titles}{$label} = $header; } - $m =~ s/^[ \t]*//g; # leading whitespace - $m =~ s/[ \t]*$//g; # trailing whitespace + $m =~ s/^[ \t]*//g; # leading whitespace + $m =~ s/[ \t]*$//g; # trailing whitespace push(@attr,(id=>"$label")) if ($label ne ""); push(@attr,(display=>"block")) if ($display_as_block == 1); - $m = $mathParser->TextToMathML($m,\@attr); + $m = $mathParser->TextToMathML($m,\@attr); "$m"; }egsx; @@ -2343,43 +2343,43 @@ sub _DoMathSpans { sub _DoDefinitionLists { # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra - + my ($self, $text) = @_; my $less_than_tab = $self->{tab_width} -1; return $text unless $text =~ /\n[ ]{0,$less_than_tab}\:[ \t]+/; - + my $line_start = qr{ [ ]{0,$less_than_tab} }mx; - + my $term = qr{ $line_start [^:\s][^\n]*\n }sx; - + my $definition = qr{ \n?[ ]{0,$less_than_tab} \:[ \t]+(.*?)\n ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns, # or the end of the document }sx; - + my $definition_block = qr{ ((?:$term)+) # $1 = one or more terms ((?:$definition)+) # $2 = by one or more definitions }sx; - + my $definition_list = qr{ (?:$definition_block\n*)+ # One ore more definition blocks }sx; - + $text =~ s{ ($definition_list) # $1 = the whole list }{ my $list = $1; my $result = $1; - + $list =~ s{ (?:$definition_block)\n* }{ @@ -2399,7 +2399,7 @@ sub _DoDefinitionLists { } $result; }xmge; - + $defs =~ s{ $definition }{ @@ -2407,13 +2407,13 @@ sub _DoDefinitionLists { $def =~ s/^[ ]{0,$self->{tab_width}}//gm; "
    \n" . $self->_RunBlockGamut($def) . "\n
    \n"; }xsge; - + $terms . $defs . "\n"; }xsge; - + "
    \n" . $list . "
    \n\n"; }xsge; - + return $text } From a786b28f575b1f5048ae6383d09e909198e4c575 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 01:00:17 +0100 Subject: [PATCH 17/45] Merge Text::MultiMarkdown Merge the Text::MultiMarkdown work from Doran. This includes adding new switches to disable MMD enhancements, including the documetnation from Text::MultiMarkdown and minor changes to function names to align with the Text::MultiMarkdown source (ease of diff). --- lib/MultiMarkdown.pm | 312 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 260 insertions(+), 52 deletions(-) mode change 100755 => 100644 lib/MultiMarkdown.pm diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm old mode 100755 new mode 100644 index 5f1668b..ee761f3 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -100,10 +100,198 @@ our %g_default_settings = ( bibliography_title => "Bibliography", document_format => "", empty_element_suffix => " />", + heading_ids => 1, + img_ids => 1, tab_width => 4, use_metadata => 1, + # WikiWords and [[Wiki Links]] are not supported anymore + use_wikilinks => 0, ); +=head1 NAME + +MultiMarkdown - Convert MultiMarkdown syntax to (X)HTML + +=head1 SYNOPSIS + + use MultiMarkdown 'markdown'; + my $html = markdown($text); + + use MultiMarkdown 'markdown'; + my $html = markdown( $text, { + empty_element_suffix => '>', + tab_width => 2, + use_wikilinks => 1, + } ); + + use MultiMarkdown; + my $m = MultiMarkdown->new; + my $html = $m->markdown($text); + + use MultiMarkdown; + my $m = MultiMarkdown->new( + empty_element_suffix => '>', + tab_width => 2, + use_wikilinks => 1, + ); + my $html = $m->markdown( $text ); + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (C<<
    >>, C<< >> etc.). Note that by default +Markdown isn't interpreted in HTML block-level elements, unless you add +a C attribute to the element. See L for +details. + +This module implements the MultiMarkdown markdown syntax extensions from: + + http://fletcherpenney.net/multimarkdown/ + +=head1 SYNTAX + +For more information about (original) Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + +This module implements MultiMarkdown, which is an extension to Markdown.. + +The extension is documented at: + + http://fletcherpenney.net/multimarkdown/ + +and borrows from php-markdown, which lives at: + + http://michelf.com/projects/php-markdown/extra/ + +This documentation is going to be moved/copied into this module for clearer reading in a future release.. + +=head1 OPTIONS + +MultiMarkdown supports a number of options to it's processor which control the behaviour of the output document. + +These options can be supplied to the constructor, on in a hash with the individual calls to the markdown method. +See the synopsis for examples of both of the above styles. + +The options for the processor are: + +=over + +=item use_metadata + +Controls the metadata options below. + +=item strip_metadata + +If true, any metadata in the input document is removed from the output document (note - does not take effect in complete document format). + +=item empty element suffix + +This option can be used to generate normal HTML output. By default, it is ' />', which is xHTML, change to '>' for normal HTML. + +=item img_ids + +Controls if tags generated have an id attribute. Defaults to true. +Turn off for compatibility with the original markdown. + +=item heading_ids + +Controls if tags generated have an id attribute. Defaults to true. +Turn off for compatibility with the original markdown. + +=item bibliography_title + +The title of the generated bibliography, defaults to 'Bibliography'. + +=item tab_width + +Controls indent width in the generated markup, defaults to 4 + +=item disable_tables + +If true, this disables the MultiMarkdown table handling. + +=item disable_footnotes + +If true, this disables the MultiMarkdown footnotes handling. + +=item disable_bibliography + +If true, this disables the MultiMarkdown bibliography/citation handling. + +=back + +A number of possible items of metadata can also be supplied as options. +Note that if the use_metadata is true then the metadata in the document will overwrite the settings on command line. + +Metadata options supported are: + +=over + +=item document_format + +=item use_wikilinks + +=item base_url + +=back + +=head1 METADATA + +MultiMarkdown supports the concept of 'metadata', which allows you to specify a number of formatting options +within the document itself. Metadata should be placed in the top few lines of a file, on value per line as colon separated key/value pairs. +The metadata should be separated from the document with a blank line. + +Most metadata keys are also supported as options to the constructor, or options +to the markdown method itself. (Note, as metadata, keys contain space, whereas options the keys are underscore separated.) + +You can attach arbitrary metadata to a document, which is output in HTML tags if unknown, see t/11document_format.t for more info. + +A list of 'known' metadata keys, and their effects are listed below: + +=over + +=item document format + +If set to 'complete', MultiMarkdown will render an entire xHTML page, otherwise it will render a document fragment + +=over + +=item css + +Sets a CSS file for the file, if in 'complete' document format. + +=item title + +Sets the page title, if in 'complete' document format. + +=back + +=item use wikilinks + +If set to '1' or 'on', causes links that are WikiWords to automatically be processed into links. + +=item base url + +This is the base URL for referencing wiki pages. In this is not supplied, all wiki links are relative. + +=back + +=head1 METHODS + +=head2 new + +A simple constructor, see the SYNTAX and OPTIONS sections for more information. + +=cut + sub new { my ($class, %params) = @_; @@ -112,11 +300,22 @@ sub new { $p{$_} = $params{$_}; } + if ($p{use_wikilinks}) { + croak('Sorry, WikiLinks are not supported in this version of ' . __PACKAGE__); + } + my $self = { params => \%p }; bless $self, ref($class) || $class; return $self; } +=head2 markdown + +The main function as far as the outside world is concerned. See the SYNOPSIS +for details on use. + +=cut + sub markdown { my ( $self, $text, $options ) = @_; @@ -141,14 +340,12 @@ sub markdown { return $self->_Markdown($text); } -# Global hashes, used by various utility routines -# Clear the global hashes. If we don't clear these, you get conflicts -# from other articles when generating a page which contains more than -# one article (e.g. an index page that shows the N most recent -# articles): -sub _CleanUpRunData($$) { +sub _CleanUpRunData { my ($self, $options) = @_; - + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): $self->{_urls} = {}; $self->{_titles} = {}; $self->{_html_blocks} = {}; @@ -185,7 +382,7 @@ sub _Markdown { $text = $self->_CleanUpDoc($text); # Strip out MetaData - $text = $self->_ParseMetaData($text) if $self->{use_metadata}; + $text = $self->_ParseMetaData($text) if ($self->{use_metadata} || $self->{strip_metadata}); # And recheck for leading blank lines $text =~ s/^\n+//s; @@ -194,7 +391,7 @@ sub _Markdown { $text = $self->_HashHTMLBlocks($text); # Strip footnote and link definitions, store in hashes. - $text = $self->_StripFootnoteDefinitions($text); + $text = $self->_StripFootnoteDefinitions($text) unless $self->{disable_footnotes}; $text = $self->_StripLinkDefinitions($text); @@ -204,28 +401,26 @@ sub _Markdown { $text = $self->_RunBlockGamut($text); - $text = $self->_DoMarkdownCitations($text); - - $text = $self->_DoFootnotes($text); + $text = $self->_DoMarkdownCitations($text) unless $self->{disable_bibliography}; + $text = $self->_DoFootnotes($text) unless $self->{disable_footnotes}; $text = _UnescapeSpecialChars($text); # Clean encoding within HTML comments $text = $self->_UnescapeComments($text); - $text = $self->_FixFootnoteParagraphs($text); - $text .= $self->_PrintFootnotes(); - - $text .= $self->_PrintMarkdownBibliography(); + $text = $self->_FixFootnoteParagraphs($text) unless $self->{disable_footnotes}; + $text .= $self->_PrintFootnotes() unless $self->{disable_footnotes}; + $text .= $self->_PrintMarkdownBibliography() unless $self->{disable_bibliography}; $text = _ConvertCopyright($text); if (lc($self->{document_format}) =~ /^complete\s*$/i) { - return $self->xhtmlMetaData() . "\n\n" . $text . "\n\n"; + return $self->_xhtmlMetaData() . "\n\n" . $text . "\n\n"; } elsif (lc($self->{document_format}) =~ /^snippet\s*$/i) { return $text . "\n"; } else { - return $self->{document_format} . $self->textMetaData() . $text . "\n"; + return $self->{document_format} . $self->_textMetaData() . $text . "\n"; } } @@ -478,7 +673,8 @@ sub _RunBlockGamut { # Do tables first to populate the table id's for cross-refs # Escape
     so we don't get greedy with tables
    -	$text = $self->_DoTables($text);
    +
    +	$text = $self->_DoTables($text) unless $self->{disable_tables};
     
     	# And now, protect our tables
     	$text = $self->_HashHTMLBlocks($text);
    @@ -593,7 +789,7 @@ sub _DoAnchors {
     		}
     
     		# Allow automatic cross-references to headers
    -		my $label = Header2Label($link_id);
    +		my $label = _Header2Label($link_id);
     		if (defined $self->{_urls}{$link_id}) {
     			my $url = $self->{_urls}{$link_id};
     			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    @@ -687,7 +883,7 @@ sub _DoAnchors {
     		(my $link_id = lc $2) =~ s{[ ]?\n}{ }g; # lower-case and turn embedded newlines into spaces
     
     		# Allow automatic cross-references to headers
    -		my $label = Header2Label($link_id);
    +		my $label = _Header2Label($link_id);
     		if (defined $self->{_urls}{$link_id}) {
     			my $url = $self->{_urls}{$link_id};
     			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    @@ -758,18 +954,23 @@ sub _DoImages {
     		}
     
     		$alt_text =~ s/"/"/g;
    +
     		if (defined $self->{_urls}{$link_id}) {
     			my $url = $self->{_urls}{$link_id};
     			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
     			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
     
    -			my $label = Header2Label($alt_text);
    -			$self->{_crossrefs}{$label} = "#$label";
    -			if (! defined $self->{_titles}{$link_id}) {
    -				$self->{_titles}{$link_id} = $alt_text;
    +			my $idString = "";
    +			if ($self->{img_ids}) {
    +				my $label = _Header2Label($alt_text);
    +				$self->{_crossrefs}{$label} = "#$label";
    +				if (! defined $self->{_titles}{$link_id}) {
    +					$self->{_titles}{$link_id} = $alt_text;
    +				}
    +				$idString = " id=\"$label\"";
     			}
     
    -			$result = "\"$alt_text\"";{_titles}{$link_id}) {
     				my $title = $self->{_titles}{$link_id};
     				$title =~ s! \* !$g_escape_table{'*'}!gx;
    @@ -818,15 +1019,20 @@ sub _DoImages {
     
     		$alt_text =~ s/"/"/g;
     		$title    =~ s/"/"/g;
    +
     		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
     		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
     		$url =~ s{^<(.*)>$}{$1};					# Remove <>'s surrounding URL, if present
     
    -		my $label = Header2Label($alt_text);
    -		$self->{_crossrefs}{$label} = "#$label";
    -#		$self->{_titles}{$label} = $alt_text;			# I think this line should not be here
    +		my $idString = "";
    +		if ($self->{img_ids}) {
    +			my $label = _Header2Label($alt_text);
    +			$self->{_crossrefs}{$label} = "#$label";
    +#			$self->{_titles}{$label} = $alt_text;			# I think this line should not be here
    +			$idString = " id=\"$label\"";
    +		}
     
    -		$result = "\"$alt_text\"";_RunSpanGamut($1);
     		$header =~ s/^\s*//s;
     
    -		if ($label ne "") {
    +		if ($self->{heading_ids} && $label ne "") {
     			$self->{_crossrefs}{$label} = "#$label";
     			$self->{_titles}{$label} = _StripHTML($header);
     			$idString = " id=\"$label\"";
    @@ -877,14 +1083,14 @@ sub _DoHeaders {
     
     	$text =~ s{ ^(.+?)(?:\s*(?_RunSpanGamut($1);
     		$header =~ s/^\s*//s;
     
    -		if ($label ne "") {
    +		if ($self->{heading_ids} && $label ne "") {
     			$self->{_crossrefs}{$label} = "#$label";
     			$self->{_titles}{$label} = _StripHTML($header);
     			$idString = " id=\"$label\"";
    @@ -917,14 +1123,14 @@ sub _DoHeaders {
     		}{
     			my $h_level = length($1) + $self->{base_header_level} - 1;
     			if (defined $3) {
    -				$label = Header2Label($3);
    +				$label = _Header2Label($3);
     			} else {
    -				$label = Header2Label($2);
    +				$label = _Header2Label($2);
     			}
     			$header = $self->_RunSpanGamut($2);
     			$header =~ s/^\s*//s;
     
    -			if ($label ne "") {
    +			if ($self->{heading_ids} && $label ne "") {
     				$self->{_crossrefs}{$label} = "#$label";
     				$self->{_titles}{$label} = _StripHTML($header);
     				$idString = " id=\"$label\"";
    @@ -1646,7 +1852,7 @@ sub _StripFootnoteDefinitions {
     		my $footnote = "$2\n";
     		$footnote =~ s/^[ ]{0,$self->{tab_width}}//gm;
     
    -		$self->{_footnotes}{id2footnote($id)} = $footnote;
    +		$self->{_footnotes}{_Id2Footnote($id)} = $footnote;
     	}
     
     	return $text;
    @@ -1667,7 +1873,7 @@ sub _DoFootnotes {
     		\[\^(.+?)\]		# id = $1
     	}{
     		my $result = "";
    -		my $id = id2footnote($1);
    +		my $id = _Id2Footnote($1);
     		if (defined $self->{_footnotes}{$id} ) {
     			$self->{_footnote_counter}++;
     			if ($self->{_footnotes}{$id} =~ /^(

    )?glossary:/i) { @@ -1739,7 +1945,7 @@ sub _PrintFootnotes { return $result; } -sub Header2Label { +sub _Header2Label { my $header = shift; my $label = lc $header; $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters @@ -1748,7 +1954,7 @@ sub Header2Label { return $label; } -sub id2footnote { +sub _Id2Footnote { # Since we prepend "fn:", we can allow leading digits in footnotes my $id = shift; my $footnote = lc $id; @@ -1757,7 +1963,7 @@ sub id2footnote { } -sub xhtmlMetaData { +sub _xhtmlMetaData { my $self = shift; my $result = qq{\n}; @@ -1800,14 +2006,16 @@ sub xhtmlMetaData { return $result; } -sub textMetaData { +sub _textMetaData { my $self = shift; my $result = ""; + return $result if $self->{strip_metadata}; + foreach my $key (sort keys %{$self->{_metadata}}) { $result .= "$key: $self->{_metadata}{$key}\n"; } - $result =~ s/\s*\n/
    \n/g; + $result =~ s/\s*\n/{empty_element_suffix}\n/g; if ($result ne "") { $result.= "\n"; @@ -1894,7 +2102,7 @@ sub _DoTables { my $table_id = ""; my $table_caption = ""; - $table_id = Header2Label($2); + $table_id = _Header2Label($2); if (defined $1) { $table_caption = $1; @@ -2178,7 +2386,7 @@ sub _DoMarkdownCitations { $result .= ")"; } - if (Header2Label($anchor_text) eq "notcited"){ + if (_Header2Label($anchor_text) eq "notcited"){ $result = ""; } $result; @@ -2240,7 +2448,7 @@ sub _GenerateImageCrossRefs { $alt_text =~ s/"/"/g; if (defined $self->{_urls}{$link_id}) { - my $label = Header2Label($alt_text); + my $label = _Header2Label($alt_text); $self->{_crossrefs}{$label} = "#$label"; } else { @@ -2278,7 +2486,7 @@ sub _GenerateImageCrossRefs { my $alt_text = $2; $alt_text =~ s/"/"/g; - my $label = Header2Label($alt_text); + my $label = _Header2Label($alt_text); $self->{_crossrefs}{$label} = "#$label"; $whole_match; }xsge; @@ -2292,7 +2500,7 @@ sub _FindMathEquations{ $text =~ s{ (\]*)id=\"(.*?)\"> # " }{ - my $label = Header2Label($2); + my $label = _Header2Label($2); my $header = $self->_RunSpanGamut($2); $self->{_crossrefs}{$label} = "#$label"; @@ -2323,7 +2531,7 @@ sub _DoMathSpans { my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); if (defined $3) { - $label = Header2Label($3); + $label = _Header2Label($3); my $header = $self->_RunSpanGamut($3); $self->{_crossrefs}{$label} = "#$label"; From 909ef194baa2e716172154a3e8d62e79869e6a7e Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 01:14:56 +0100 Subject: [PATCH 18/45] Text::MultiMarkdown alias to MultiMarkdown Also move ASCIIMathML in Text/, making it easier to find. --- lib/MultiMarkdown.pm | 19 ++++++++++--------- lib/{ => Text}/ASCIIMathML.pm | 0 lib/Text/MultiMarkdown.pm | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) rename lib/{ => Text}/ASCIIMathML.pm (100%) create mode 120000 lib/Text/MultiMarkdown.pm diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index ee761f3..2d26112 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -38,15 +38,7 @@ use base 'Exporter'; our $VERSION = '2.0.b6'; our @EXPORT_OK = qw{markdown}; -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require Text::ASCIIMathML; -} else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl - my $me = __FILE__; # Where am I? - - my $path = dirname($me); - require File::Spec->join($path, "ASCIIMathML.pm"); -} +require Text::ASCIIMathML; our $mathParser = new Text::ASCIIMathML(); @@ -2644,6 +2636,15 @@ sub _UnescapeComments{ return $text; } +package Text::MultiMarkdown; +use base 'MultiMarkdown'; + +our @EXPORT_OK = qw{markdown}; + +sub markdown { + MultiMarkdown::markdown(@_) +} + 1; diff --git a/lib/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm similarity index 100% rename from lib/ASCIIMathML.pm rename to lib/Text/ASCIIMathML.pm diff --git a/lib/Text/MultiMarkdown.pm b/lib/Text/MultiMarkdown.pm new file mode 120000 index 0000000..5dd02ea --- /dev/null +++ b/lib/Text/MultiMarkdown.pm @@ -0,0 +1 @@ +../MultiMarkdown.pm \ No newline at end of file From 08f1b0a381a3c84a5402d9f897859b468b4d01ed Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 09:03:24 +0100 Subject: [PATCH 19/45] Handle 'Keywords' in metadata correctly --- lib/MultiMarkdown.pm | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 2d26112..1aaa15a 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1773,7 +1773,7 @@ sub _ParseMetaData { my ($self, $text) = @_; my $clean_text = ""; - my ($inMetaData, $currentKey) = (1,''); + my ($inMetaData, $currentKey, $lckey) = (1,'', ''); # If only metadata is "Format: complete" then skip @@ -1792,22 +1792,23 @@ sub _ParseMetaData { my $meta = $2; $currentKey =~ s/\s+/ /g; $currentKey =~ s/\s$//; + $lckey = lc $currentKey; $self->{_metadata}{$currentKey} = $meta; - if (lc($currentKey) eq "format") { + if ($lckey eq "format") { $self->{document_format} = lc($self->{_metadata}{$currentKey}); } - if (lc($currentKey) eq "base url") { + if ($lckey eq "base url") { $self->{base_url} = $self->{_metadata}{$currentKey}; } - if (lc($currentKey) eq "bibliography title") { + if ($lckey eq "bibliography title") { $self->{bibliography_title} = $self->{_metadata}{$currentKey}; $self->{bibliography_title} =~ s/\s*$//; } - if (lc($currentKey) eq "base header level") { + if ($lckey eq "base header level") { $self->{base_header_level} = $self->{_metadata}{$currentKey}; } - if (!$self->{_metadata_newline}{$currentKey}) { - $self->{_metadata_newline}{$currentKey} = $self->{_metadata_newline}{default}; + if (!$self->{_metadata_newline}{$lckey}) { + $self->{_metadata_newline}{$lckey} = $self->{_metadata_newline}{default}; } } else { if ($currentKey eq "") { @@ -1817,7 +1818,7 @@ sub _ParseMetaData { next; } if ($line =~ /^\s*(.+)$/ ) { - $self->{_metadata}{$currentKey} .= "$self->{_metadata_newline}{$currentKey}$1"; + $self->{_metadata}{$currentKey} .= "$self->{_metadata_newline}{$lckey}$1"; } } } else { From 03edab7a80f61ad0bb43fd2c92be45cd386fe1d6 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 10:05:34 +0100 Subject: [PATCH 20/45] Emulate Markdown when called as Markdown.pl --- bin/MultiMarkdown.pl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index c9ecd2f..f463a7f 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -148,6 +148,15 @@ sub story { my %opts = (); + if (basename($0) eq 'Markdown.pl') { + %opts = ( + allow_mathml => 0, + use_metadata => 0, + heading_ids => 0, + img_ids => 0, + ); + } + #### Check for command-line switches: ################# my %cli_opts; use Getopt::Long; From 1e2025768241b9ca2e08463a7045348718959fe4 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 10:09:50 +0100 Subject: [PATCH 21/45] New codeblocks_newline option This allows the final newline at the end of code blocks to be customized. The default is to have no newline (as per MMD), but it can be set to "\n" to emulate classic Markdown. --- bin/MultiMarkdown.pl | 1 + lib/MultiMarkdown.pm | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index f463a7f..ee72a09 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -154,6 +154,7 @@ sub story { use_metadata => 0, heading_ids => 0, img_ids => 0, + codeblocks_newline => "\n", ); } diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 1aaa15a..48d15d5 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -98,6 +98,7 @@ our %g_default_settings = ( use_metadata => 1, # WikiWords and [[Wiki Links]] are not supported anymore use_wikilinks => 0, + codeblocks_newline => '', ); =head1 NAME @@ -1324,7 +1325,7 @@ sub _DoCodeBlocks { $codeblock =~ s/\A\n+//; # trim leading newlines $codeblock =~ s/\n+\z//; # trim trailing newlines - $result = "\n\n

    " . $codeblock . "
    \n\n"; # CHANGED: Removed newline for MMD + $result = "\n\n
    " . $codeblock . "$self->{codeblocks_newline}
    \n\n"; $result; }egmx; From e621a95ff1b097aad41ef84eae84313d8708dc1b Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 10:20:56 +0100 Subject: [PATCH 22/45] Revert 7f8cadeacfd9e7ba335e86aeebc8738693ce393a Processing those blocks early introduces other bugs against the testsuite, we have to find a better solution. --- lib/MultiMarkdown.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 48d15d5..09673d9 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -677,10 +677,10 @@ sub _RunBlockGamut { $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx; $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx; - $text = $self->_DoBlockQuotes($text); $text = $self->_DoDefinitionLists($text); $text = $self->_DoLists($text); $text = $self->_DoCodeBlocks($text); + $text = $self->_DoBlockQuotes($text); # We already ran _HashHTMLBlocks() before, in Markdown(), but that # was to escape raw HTML in the original Markdown source. This time, From aa1953061e5be47d9b634303d6156c2975de5123 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 14:47:21 +0100 Subject: [PATCH 23/45] More Markdown.pl emulation --- bin/MultiMarkdown.pl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index ee72a09..765cab2 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -155,6 +155,9 @@ sub story { heading_ids => 0, img_ids => 0, codeblocks_newline => "\n", + disable_tables => 1, + disable_footnotes =>1, + disable_bibliography =>1, ); } From bcfadcbd36e685e7999ff5ebd8f66b2e5d700ec6 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 14:51:12 +0100 Subject: [PATCH 24/45] Command-line option to emulate Markdown --- bin/MultiMarkdown.pl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 765cab2..cff8b5b 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -148,7 +148,18 @@ sub story { my %opts = (); - if (basename($0) eq 'Markdown.pl') { + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + 'markdownonly', + ); + + if ($cli_opts{'markdownonly'} || basename($0) eq 'Markdown.pl') { %opts = ( allow_mathml => 0, use_metadata => 0, @@ -161,15 +172,6 @@ sub story { ); } - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); if ($cli_opts{'version'}) { # Version info print "\nThis is MultiMarkdown, version $MultiMarkdown::VERSION.\n"; print "Original code Copyright 2004 John Gruber\n"; @@ -186,7 +188,6 @@ sub story { $opts{empty_element_suffix} = ">"; } - #### Process incoming text: ########################### my $text; { From eda65471c434bb9cb8c4e751adb648abcaf842e5 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 14:57:13 +0100 Subject: [PATCH 25/45] Add leading spaces to _all_ lines in a blockquote --- lib/MultiMarkdown.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 09673d9..e06514a 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1471,7 +1471,7 @@ sub _DoBlockQuotes { $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines $bq = $self->_RunBlockGamut($bq); # recurse - $bq =~ s/^/ /g; + $bq =~ s/^/ /mg; # These leading spaces screw with
     content, so we need to fix that:
     			$bq =~ s{
     					(\s*
    .+?
    ) From c2f071d441852547993d51b2637695e4c4e5476e Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 16:09:35 +0100 Subject: [PATCH 26/45] Don't strip final whitespace in list items Go back to Markdown-compatible output. Find a test case where this effectively breaks the HTML parser. --- lib/MultiMarkdown.pm | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index e06514a..c73c790 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1203,12 +1203,7 @@ sub _DoLists { $list =~ s/\n{2,}/\n\n\n/g; my $result = $self->_ProcessListItems($list, $marker_any); - # Trim any trailing whitespace, to put the closing `` - # up on the preceding line, to get it past the current stupid - # HTML block parser. This is a hack to work around the terrible - # hack that is the HTML block parser. - $result =~ s{\s+$}{}; - $result = "<$list_type>" . $result . "\n"; + $result = "<$list_type>\n" . $result . "\n"; $result; }egmx; } From 54b32b4f0220a55e02757ded01481d33f4f65143 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 19:22:52 +0100 Subject: [PATCH 27/45] Outer lists have proper should end block-style This ensures that e.g. a blockquote following them is properly recognized. --- lib/MultiMarkdown.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index c73c790..34166ab 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1218,7 +1218,7 @@ sub _DoLists { # paragraph for the last item in a list, if necessary: $list =~ s/\n{2,}/\n\n\n/g; my $result = $self->_ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; + $result = "<$list_type>\n" . $result . "\n\n"; $result; }egmx; } From efa86921bd55f1f36a4c60c03a1b16ba16ae561a Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 9 Jan 2011 19:11:13 +0100 Subject: [PATCH 28/45] Blockquotes-in-list and running blockquotes fixes The original Markdown implementation supports "running blockquotes": if any line in a paragraph start with the '>' character, that line and all the subsequent ones are split from the paragraph and become a blockquote. This is inconsistent with the list behavior (list don't start mid-paragraph). Additionally, if a blockquote happens within a non-block list item (e.g. a standalone item or an item in a sequence of items not separated by empty lines), mismatched markup is generated, with interleaved 'blockquote' and 'li' tag pairs because Markdown starts thinking it's in span mode, and then reparses the span-mode output in block mode. Blockquote-in-list detection is solved by letting the list item processor check for existence of >-starting lines in the whole item. Since this is inefficient, we allow the user to disable running blockquotes, in which case blockquotes cannot start mid-paragraph (consistently with the list behavior) and the blockquote-in-list detection is much more efficient. --- lib/MultiMarkdown.pm | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 34166ab..79d16ff 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -99,6 +99,7 @@ our %g_default_settings = ( # WikiWords and [[Wiki Links]] are not supported anymore use_wikilinks => 0, codeblocks_newline => '', + running_blockquotes => 1, ); =head1 NAME @@ -297,6 +298,33 @@ sub new { croak('Sorry, WikiLinks are not supported in this version of ' . __PACKAGE__); } + # The original Markdown implementation supports "running blockquotes": if + # any line in a paragraph start with the '>' character, that line and all + # the subsequent ones are split from the paragraph and become a + # blockquote. This is inconsistent with the list behavior (list don't + # start mid-paragraph). + # + # Additionally, if a blockquote happens within a non-block list item (e.g. + # a standalone item or an item in a sequence of items not separated by + # empty lines), mismatched markup is generated, with interleaved + # 'blockquote' and 'li' tag pairs because Markdown starts thinking it's in + # span mode, and then reparses the span-mode output in block mode. + # + # Blockquote-in-list detection is solved by letting the list item + # processor check for existence of >-starting lines in the whole item. + # Since this is inefficient, we allow the user to disable running + # blockquotes, in which case blockquotes cannot start mid-paragraph + # (consistently with the list behavior) and the blockquote-in-list + # detection is much more efficient. + + if ($p{running_blockquotes}) { + $p{_blockquote_lead} = ''; + $p{_list_blockquote_pattern} = qr/^[ \t]*>/m; + } else { + $p{_blockquote_lead} = qr/(?:(?<=\n\n)|\A\n?)/; + $p{_list_blockquote_pattern} = qr/\A>/; + } + my $self = { params => \%p }; bless $self, ref($class) || $class; return $self; @@ -1276,7 +1304,8 @@ sub _ProcessListItems { my $leading_line = $1; my $leading_space = $2; - if ($leading_line or ($item =~ m/\n{2,}/)) { + if ($leading_line or ($item =~ m/\n{2,}/) + or ($item =~ $self->{_list_blockquote_pattern})) { $item = $self->_RunBlockGamut($self->_Outdent($item)); } else { @@ -1452,6 +1481,7 @@ sub _DoBlockQuotes { my ($self, $text) = @_; $text =~ s{ + $self->{_blockquote_lead} ( # Wrap whole match in $1 ( ^[ \t]*>[ \t]? # '>' at the start of a line From a09ade182db5ed49ef1ee90b011d74687511e28d Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 10 Jan 2011 09:02:30 +0100 Subject: [PATCH 29/45] Support running lists This is enabled by passing a "leading" regexp as the running_lists option to MMD. In this case, a mid-paragraph line starting with a list item (ordered or unordered) and preceded by a line ending with the leading regexp will switch the paragraph to list mode. --- lib/MultiMarkdown.pm | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 79d16ff..2c08cdf 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -100,6 +100,7 @@ our %g_default_settings = ( use_wikilinks => 0, codeblocks_newline => '', running_blockquotes => 1, + running_lists => undef, ); =head1 NAME @@ -325,6 +326,18 @@ sub new { $p{_list_blockquote_pattern} = qr/\A>/; } + # As an extension to Markdown, we also support running lists, i.e. + # lists that can start in the middle of a pragraph. running_lists is + # undef by default, and when defined it should be set to the regexp + # that must match at the end of the preceding line to allow a running + # list (typically something like ':' would be used); + + if (defined $p{running_lists}) { + $p{_list_lead} = qr/(?:(?<=\n\n|$p{running_lists}\n)|\A\n?)/; + } else { + $p{_list_lead} = qr/(?:(?<=\n\n)|\A\n?)/; + } + my $self = { params => \%p }; bless $self, ref($class) || $class; return $self; @@ -1237,7 +1250,7 @@ sub _DoLists { } else { $text =~ s{ - (?:(?<=\n\n)|\A\n?) + $self->{_list_lead} $whole_list }{ my $list = $1; From 1739db3b79f9165a4057d604bac2b63b60694e4e Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 7 Mar 2011 18:33:41 +0100 Subject: [PATCH 30/45] More readlink fixes --- bin/MultiMarkdown.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index cff8b5b..a7bf049 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -9,7 +9,7 @@ eval {require MultiMarkdown}; if ($@) { - my $me = readlink(__FILE__); + my $me = readlink(__FILE__) || __FILE__; my $path = dirname(dirname($me)); From a49f18a376b43605b3e4f292d4743a9408e172b7 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 6 May 2012 08:06:25 +0200 Subject: [PATCH 31/45] Add HTML5 block-level tags --- lib/MultiMarkdown.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 2c08cdf..c986e12 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -557,7 +557,10 @@ our $g_block_tags = qr{ (?: p | div | h[1-6] | blockquote | pre | table | dl | ol | ul | script | noscript | form | - fieldset | iframe | ins | del + fieldset | iframe | ins | del | + # HTML5 + article | header | footer | section | aside | video | + output | hgroup | canvas | figure | nav ) }x; # MultiMarkdown does not include `math` in the above list so that # Equations can optionally be included in separate paragraphs From 8212a1ac9fa61a11528fbc1124fbd8ae60cd0a23 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 6 May 2012 08:48:53 +0200 Subject: [PATCH 32/45] Handle multiple reference to the same footnote/glossary --- lib/MultiMarkdown.pm | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index c986e12..d1012f3 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -392,7 +392,7 @@ sub _CleanUpRunData { $self->{_crossrefs} = {}; $self->{_footnotes} = {}; $self->{_attributes} = {}; - $self->{_used_footnotes} = []; + $self->{_used_footnotes} = {}; $self->{_footnote_counter} = 0; $self->{_used_references} = []; $self->{_citation_counter} = 0; @@ -1910,13 +1910,21 @@ sub _DoFootnotes { my $result = ""; my $id = _Id2Footnote($1); if (defined $self->{_footnotes}{$id} ) { - $self->{_footnote_counter}++; + my $fn = $self->{_used_footnotes}; + my ($counter, $uses); + if (defined $fn->{$id}) { + $counter = $fn->{$id}->{counter}; + $uses = $fn->{$id}->{uses}+1; + } else { + $counter = $self->{_footnote_counter}++; + $uses = 1; + } if ($self->{_footnotes}{$id} =~ /^(

    )?glossary:/i) { - $result = "$self->{_footnote_counter}"; + $result = "$counter"; } else { - $result = "$self->{_footnote_counter}"; + $result = "$counter"; } - push (@{$self->{_used_footnotes}},$id); + $fn->{$id} = { counter => $counter, uses => $uses }; } $result; }xsge; @@ -1937,8 +1945,10 @@ sub _PrintFootnotes { my $footnote_counter = 0; my $result = ""; - foreach my $id (@{$self->{_used_footnotes}}) { - $footnote_counter++; + while (my ($id, $hash) = each(%{$self->{_used_footnotes}})) { + $footnote_counter = $hash->{counter}; + my $uses = $hash->{uses}; + my $use = 0; my $footnote = $self->{_footnotes}{$id}; my $footnote_closing_tag = ""; @@ -1963,10 +1973,16 @@ sub _PrintFootnotes { $glossary . ":

    "; }egsx; - $result.="

  • $footnote ↩$footnote_closing_tag
  • \n\n"; + $result.="
  • $footnote"; } else { - $result.="
  • $footnote ↩$footnote_closing_tag
  • \n\n"; + $result.="
  • $footnote"; + } + while ($use < $uses) { + $use++; + $result.=" ↩"; + $result.=" " if $use < $uses; # some whitespace between backreferences } + $result.="$footnote_closing_tag
  • \n\n"; } $result .= "\n"; From e23970eb68d1b797e67c2a0d791a2b9e8e795832 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 13 May 2012 07:59:44 +0200 Subject: [PATCH 33/45] Glaring horror error from previous commit --- lib/MultiMarkdown.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index d1012f3..aa3c7e2 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1916,7 +1916,7 @@ sub _DoFootnotes { $counter = $fn->{$id}->{counter}; $uses = $fn->{$id}->{uses}+1; } else { - $counter = $self->{_footnote_counter}++; + $counter = ++$self->{_footnote_counter}; $uses = 1; } if ($self->{_footnotes}{$id} =~ /^(

    )?glossary:/i) { From 8939649f5e22b88b8188e49df4217beae6a63b54 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Fri, 17 Aug 2012 07:24:34 +0200 Subject: [PATCH 34/45] Output footnotes ordered by their counter --- lib/MultiMarkdown.pm | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index aa3c7e2..6261de7 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1943,6 +1943,7 @@ sub _FixFootnoteParagraphs { sub _PrintFootnotes { my $self = shift; my $footnote_counter = 0; + my @fnlist = (); my $result = ""; while (my ($id, $hash) = each(%{$self->{_used_footnotes}})) { @@ -1951,6 +1952,7 @@ sub _PrintFootnotes { my $use = 0; my $footnote = $self->{_footnotes}{$id}; my $footnote_closing_tag = ""; + my $fntext = ""; $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; $footnote_closing_tag = $1; @@ -1973,21 +1975,22 @@ sub _PrintFootnotes { $glossary . ":

    "; }egsx; - $result.="

  • $footnote"; + $fntext.="
  • $footnote"; } else { - $result.="
  • $footnote"; + $fntext.="
  • $footnote"; } while ($use < $uses) { $use++; - $result.=" ↩"; - $result.=" " if $use < $uses; # some whitespace between backreferences + $fntext.=" ↩"; + $fntext.=" " if $use < $uses; # some whitespace between backreferences } - $result.="$footnote_closing_tag
  • \n\n"; + $fntext.="$footnote_closing_tag\n\n"; + $fnlist[$footnote_counter-1] = $fntext; } - $result .= "\n"; - if ($footnote_counter > 0) { - $result = "\n\n
    \n{empty_element_suffix}\n
      \n\n".$result; + if (@fnlist > 0) { + $result = "\n\n
      \n{empty_element_suffix}\n
        \n\n". + join('',@fnlist) . "
      \n
      "; } else { $result = ""; } From 33418f44af1c1de23e89c6c2869718163f7ca05d Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 5 May 2013 11:27:08 +0200 Subject: [PATCH 35/45] Allow footnotes in footnotes Allow text in footnotes to reference other footnotes. Refactor in-text footnote _mark_ processing from footnote _text_ processing, and process footnote marks when processing each footnote text. --- lib/MultiMarkdown.pm | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 6261de7..232139b 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -1893,17 +1893,9 @@ sub _StripFootnoteDefinitions { return $text; } -sub _DoFootnotes { +sub _DoFootnoteMarks { my ($self, $text) = @_; - # First, run routines that get skipped in footnotes - foreach my $label (sort keys %{$self->{_footnotes}}) { - my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}); - - $footnote = $self->_DoMarkdownCitations($footnote); - $self->{_footnotes}{$label} = $footnote; - } - $text =~ s{ \[\^(.+?)\] # id = $1 }{ @@ -1932,6 +1924,21 @@ sub _DoFootnotes { return $text; } +sub _DoFootnotes { + my ($self, $text) = @_; + + # First, run routines that get skipped in footnotes + foreach my $label (sort keys %{$self->{_footnotes}}) { + my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}); + + $footnote = $self->_DoMarkdownCitations($footnote); + $footnote = $self->_DoFootnoteMarks($footnote); + $self->{_footnotes}{$label} = $footnote; + } + + return $self->_DoFootnoteMarks($text); +} + sub _FixFootnoteParagraphs { my ($self, $text) = @_; From 521faa186faa85800ad271a43eadfe8cf31044b6 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 13 Oct 2014 12:16:56 +0200 Subject: [PATCH 36/45] Support math between $..$ --- lib/MultiMarkdown.pm | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 232139b..7fceb5b 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -15,7 +15,6 @@ # Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007 # # -# TODO: Change math mode delimiter? # TODO: Still need to get the glossary working in non-memoir documents # TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly" # TODO: Look into discussion re: assigning classes to div's/span's on Markdown list. @@ -2580,6 +2579,7 @@ sub _DoMathSpans { my $display_as_block = 0; $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); + # << .. >> delimiters $text =~ s{ (?_RunSpanGamut($3); + + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; + } + push(@attr,(id=>"$label")) if ($label ne ""); + push(@attr,(display=>"block")) if ($display_as_block == 1); + + $m = $mathParser->TextToMathML($m,\@attr); + "$m"; + }egsx; + return $text; } From a2bc6d2145741d9d2e38f8957d6b7662516bb5c7 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 13 Oct 2014 13:53:59 +0200 Subject: [PATCH 37/45] ASCIIMathML: some UTF-8 support --- lib/Text/ASCIIMathML.pm | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index ce56872..07a0a4a 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -591,6 +591,7 @@ my $AMbbb = [0xEF8C,0xEF8D,0x2102,0xEF8E,0xEF8F,0xEF90,0xEF91,0x210D,0xEF92,0xEF # Create closure for static variables my %AMSymbol = ( "sqrt" => { tag=>"msqrt", output=>"sqrt", tex=>'', ttype=>"UNARY" }, +"√" => { tag=>"msqrt", output=>"sqrt", tex=>'', ttype=>"UNARY" }, "root" => { tag=>"mroot", output=>"root", tex=>'', ttype=>"BINARY" }, "frac" => { tag=>"mfrac", output=>"/", tex=>'', ttype=>"BINARY" }, "/" => { tag=>"mfrac", output=>"/", tex=>'', ttype=>"INFIX" }, @@ -607,7 +608,9 @@ my %AMSymbol = ( # some greek symbols "alpha" => { tag=>"mi", output=>"α", tex=>'', ttype=>"CONST" }, +"α" => { tag=>"mi", output=>"α", tex=>'', ttype=>"CONST" }, "beta" => { tag=>"mi", output=>"β", tex=>'', ttype=>"CONST" }, +"β" => { tag=>"mi", output=>"β", tex=>'', ttype=>"CONST" }, "chi" => { tag=>"mi", output=>"χ", tex=>'', ttype=>"CONST" }, "delta" => { tag=>"mi", output=>"δ", tex=>'', ttype=>"CONST" }, "Delta" => { tag=>"mo", output=>"Δ", tex=>'', ttype=>"CONST" }, @@ -625,7 +628,9 @@ my %AMSymbol = ( "omega" => { tag=>"mi", output=>"ω", tex=>'', ttype=>"CONST" }, "Omega" => { tag=>"mo", output=>"Ω", tex=>'', ttype=>"CONST" }, "phi" => { tag=>"mi", output=>"ϕ", tex=>'', ttype=>"CONST" }, +"ϕ" => { tag=>"mi", output=>"ϕ", tex=>'', ttype=>"CONST" }, "varphi" => { tag=>"mi", output=>"φ", tex=>'', ttype=>"CONST" }, +"φ" => { tag=>"mi", output=>"φ", tex=>'', ttype=>"CONST" }, "Phi" => { tag=>"mo", output=>"Φ", tex=>'', ttype=>"CONST" }, "pi" => { tag=>"mi", output=>"π", tex=>'', ttype=>"CONST" }, "Pi" => { tag=>"mo", output=>"Π", tex=>'', ttype=>"CONST" }, @@ -636,7 +641,9 @@ my %AMSymbol = ( "Sigma" => { tag=>"mo", output=>"Σ", tex=>'', ttype=>"CONST" }, "tau" => { tag=>"mi", output=>"τ", tex=>'', ttype=>"CONST" }, "theta" => { tag=>"mi", output=>"θ", tex=>'', ttype=>"CONST" }, +"θ" => { tag=>"mi", output=>"θ", tex=>'', ttype=>"CONST" }, "vartheta" => { tag=>"mi", output=>"ϑ", tex=>'', ttype=>"CONST" }, +"ϑ" => { tag=>"mi", output=>"ϑ", tex=>'', ttype=>"CONST" }, "Theta" => { tag=>"mo", output=>"Θ", tex=>'', ttype=>"CONST" }, "upsilon" => { tag=>"mi", output=>"υ", tex=>'', ttype=>"CONST" }, "xi" => { tag=>"mi", output=>"ξ", tex=>'', ttype=>"CONST" }, @@ -645,10 +652,12 @@ my %AMSymbol = ( # binary operation symbols "*" => { tag=>"mo", output=>"⋅", tex=>"cdot", ttype=>"CONST" }, +"⋅" => { tag=>"mo", output=>"⋅", tex=>"cdot", ttype=>"CONST" }, "**" => { tag=>"mo", output=>"⋆", tex=>"star", ttype=>"CONST" }, "//" => { tag=>"mo", output=>"/", tex=>'', ttype=>"CONST" }, "\\\\" => { tag=>"mo", output=>"\\", tex=>"backslash", ttype=>"CONST" }, "setminus" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, +"∖" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, "xx" => { tag=>"mo", output=>"×", tex=>"times", ttype=>"CONST" }, "-:" => { tag=>"mo", output=>"÷", tex=>"div", ttype=>"CONST" }, "@" => { tag=>"mo", output=>"∘", tex=>"circ", ttype=>"CONST" }, @@ -668,26 +677,41 @@ my %AMSymbol = ( # binary relation symbols "!=" => { tag=>"mo", output=>"≠", tex=>"ne", ttype=>"CONST" }, +"≠" => { tag=>"mo", output=>"≠", tex=>"ne", ttype=>"CONST" }, ":=" => { tag=>"mo", output=>":=", tex=>'', ttype=>"CONST" }, #"lt" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, "lt" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, "<=" => { tag=>"mo", output=>"≤", tex=>"le", ttype=>"CONST" }, +"≤" => { tag=>"mo", output=>"≤", tex=>"le", ttype=>"CONST" }, "lt=" => { tag=>"mo", output=>"≤", tex=>"leq", ttype=>"CONST", latex=>1 }, ">=" => { tag=>"mo", output=>"≥", tex=>"ge", ttype=>"CONST" }, "geq" => { tag=>"mo", output=>"≥", tex=>'', ttype=>"CONST", latex=>1 }, +"≥" => { tag=>"mo", output=>"≥", tex=>"ge", ttype=>"CONST" }, "-<" => { tag=>"mo", output=>"≺", tex=>"prec", ttype=>"CONST", latex=>1 }, +"≺" => { tag=>"mo", output=>"≺", tex=>"prec", ttype=>"CONST", latex=>1 }, "-lt" => { tag=>"mo", output=>"≺", tex=>'', ttype=>"CONST" }, ">-" => { tag=>"mo", output=>"≻", tex=>"succ", ttype=>"CONST" }, +"≻" => { tag=>"mo", output=>"≻", tex=>"succ", ttype=>"CONST" }, "in" => { tag=>"mo", output=>"∈", tex=>'', ttype=>"CONST" }, +"∈" => { tag=>"mo", output=>"∈", tex=>'', ttype=>"CONST" }, "!in" => { tag=>"mo", output=>"∉", tex=>"notin", ttype=>"CONST" }, +"∉" => { tag=>"mo", output=>"∉", tex=>"notin", ttype=>"CONST" }, "sub" => { tag=>"mo", output=>"⊂", tex=>"subset", ttype=>"CONST" }, +"⊂" => { tag=>"mo", output=>"⊂", tex=>"subset", ttype=>"CONST" }, "sup" => { tag=>"mo", output=>"⊃", tex=>"supset", ttype=>"CONST" }, +"⊃" => { tag=>"mo", output=>"⊃", tex=>"supset", ttype=>"CONST" }, "sube" => { tag=>"mo", output=>"⊆", tex=>"subseteq", ttype=>"CONST" }, +"⊆" => { tag=>"mo", output=>"⊆", tex=>"subseteq", ttype=>"CONST" }, "supe" => { tag=>"mo", output=>"⊇", tex=>"supseteq", ttype=>"CONST" }, +"⊇" => { tag=>"mo", output=>"⊇", tex=>"supseteq", ttype=>"CONST" }, "-=" => { tag=>"mo", output=>"≡", tex=>"equiv", ttype=>"CONST" }, +"≡" => { tag=>"mo", output=>"≡", tex=>"equiv", ttype=>"CONST" }, "~=" => { tag=>"mo", output=>"≅", tex=>"cong", ttype=>"CONST" }, +"≅" => { tag=>"mo", output=>"≅", tex=>"cong", ttype=>"CONST" }, "~~" => { tag=>"mo", output=>"≈", tex=>"approx", ttype=>"CONST" }, +"≈" => { tag=>"mo", output=>"≈", tex=>"approx", ttype=>"CONST" }, "prop" => { tag=>"mo", output=>"∝", tex=>"propto", ttype=>"CONST" }, +"∝" => { tag=>"mo", output=>"∝", tex=>"propto", ttype=>"CONST" }, # new for perl "<" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, @@ -701,10 +725,14 @@ my %AMSymbol = ( "or" => { tag=>"mtext", output=>"or", tex=>'', ttype=>"SPACE" }, "not" => { tag=>"mo", output=>"¬", tex=>"neg", ttype=>"CONST" }, "=>" => { tag=>"mo", output=>"⇒", tex=>"implies", ttype=>"CONST" }, +"⇒" => { tag=>"mo", output=>"⇒", tex=>"implies", ttype=>"CONST" }, "if" => { tag=>"mo", output=>"if", tex=>'if', ttype=>"SPACE" }, "<=>" => { tag=>"mo", output=>"⇔", tex=>"iff", ttype=>"CONST" }, +"⇔" => { tag=>"mo", output=>"⇔", tex=>"iff", ttype=>"CONST" }, "AA" => { tag=>"mo", output=>"∀", tex=>"forall", ttype=>"CONST" }, +"∀" => { tag=>"mo", output=>"∀", tex=>"forall", ttype=>"CONST" }, "EE" => { tag=>"mo", output=>"∃", tex=>"exists", ttype=>"CONST" }, +"∃" => { tag=>"mo", output=>"∃", tex=>"exists", ttype=>"CONST" }, "_|_" => { tag=>"mo", output=>"⊥", tex=>"bot", ttype=>"CONST" }, "TT" => { tag=>"mo", output=>"⊤", tex=>"top", ttype=>"CONST" }, "|--" => { tag=>"mo", output=>"⊢", tex=>"vdash", ttype=>"CONST" }, @@ -736,9 +764,11 @@ my %AMSymbol = ( "del" => { tag=>"mo", output=>"∂", tex=>"partial", ttype=>"CONST" }, "grad" => { tag=>"mo", output=>"∇", tex=>"nabla", ttype=>"CONST" }, "+-" => { tag=>"mo", output=>"±", tex=>"pm", ttype=>"CONST" }, +"±" => { tag=>"mo", output=>"±", tex=>"pm", ttype=>"CONST" }, "O/" => { tag=>"mo", output=>"∅", tex=>"emptyset", ttype=>"CONST" }, "oo" => { tag=>"mo", output=>"∞", tex=>"infty", ttype=>"CONST" }, "aleph" => { tag=>"mo", output=>"ℵ", tex=>'', ttype=>"CONST" }, +"ℵ" => { tag=>"mo", output=>"ℵ", tex=>'', ttype=>"CONST" }, "..." => { tag=>"mo", output=>"...", tex=>"ldots", ttype=>"CONST" }, ":." => { tag=>"mo", output=>"∴", tex=>"therefore", ttype=>"CONST" }, "/_" => { tag=>"mo", output=>"∠", tex=>"angle", ttype=>"CONST" }, @@ -752,14 +782,23 @@ my %AMSymbol = ( "diamond" => { tag=>"mo", output=>"⋄", tex=>'', ttype=>"CONST" }, "square" => { tag=>"mo", output=>"□", tex=>'', ttype=>"CONST" }, "|__" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"CONST" }, +"⌊" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"CONST" }, "__|" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"CONST" }, +"⌋" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"CONST" }, "|~" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"CONST" }, +"⌈" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"CONST" }, "~|" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"CONST" }, +"⌉" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"CONST" }, "CC" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, +"ℂ" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, "NN" => { tag=>"mo", output=>"ℕ", tex=>'', ttype=>"CONST" }, +"ℕ" => { tag=>"mo", output=>"ℕ", tex=>'', ttype=>"CONST" }, "QQ" => { tag=>"mo", output=>"ℚ", tex=>'', ttype=>"CONST" }, +"ℚ" => { tag=>"mo", output=>"ℚ", tex=>'', ttype=>"CONST" }, "RR" => { tag=>"mo", output=>"ℝ", tex=>'', ttype=>"CONST" }, +"ℝ" => { tag=>"mo", output=>"ℝ", tex=>'', ttype=>"CONST" }, "ZZ" => { tag=>"mo", output=>"ℤ", tex=>'', ttype=>"CONST" }, +"ℤ" => { tag=>"mo", output=>"ℤ", tex=>'', ttype=>"CONST" }, "f" => { tag=>"mi", output=>"f", tex=>'', ttype=>"UNARY", func=>"true" }, "g" => { tag=>"mi", output=>"g", tex=>'', ttype=>"UNARY", func=>"true" }, @@ -792,6 +831,7 @@ my %AMSymbol = ( "darr" => { tag=>"mo", output=>"↓", tex=>"downarrow", ttype=>"CONST" }, "rarr" => { tag=>"mo", output=>"→", tex=>"rightarrow", ttype=>"CONST" }, "->" => { tag=>"mo", output=>"→", tex=>"to", ttype=>"CONST", latex=>1 }, +"→" => { tag=>"mo", output=>"→", tex=>"to", ttype=>"CONST", latex=>1 }, "|->" => { tag=>"mo", output=>"↦", tex=>"mapsto", ttype=>"CONST" }, "larr" => { tag=>"mo", output=>"←", tex=>"leftarrow", ttype=>"CONST" }, "harr" => { tag=>"mo", output=>"↔", tex=>"leftrightarrow", ttype=>"CONST" }, From b892c60756f70137e54761d481a6d7fd73f2ec2b Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 13 Oct 2014 14:55:50 +0200 Subject: [PATCH 38/45] Support UTF-8 in ASCIIMathML --- lib/Text/ASCIIMathML.pm | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index 07a0a4a..6d6acdf 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -903,6 +903,7 @@ sub _parseExpr : method { my $newFrag = $self->_createDocumentFragment(); my ($node, $input, $symbol); do { + $str = _replaceUniSuperSubScripts($str); $str = _removeCharsAndBlanks($str, 0); ($node, $str) = $self->_parseIexpr($str); ($input, $symbol) = $self->_getSymbol($str); @@ -1338,6 +1339,69 @@ sub _removeCharsAndBlanks { return $st; } +BEGIN { +my %UnicodeSuperscripts = ( +"⁰" => "0", +"¹" => "1", +"²" => "2", +"³" => "3", +"⁴" => "4", +"⁵" => "5", +"⁶" => "6", +"⁷" => "7", +"⁸" => "8", +"⁹" => "9", +"⁺" => "+", +"⁻" => "-", +"⁼" => "=", +"⁽" => "(", +"⁾" => ")", +"ⁿ" => "n", +); + +my %UnicodeSubscripts = ( +"₀" => "0", +"₁" => "1", +"₂" => "2", +"₃" => "3", +"₄" => "4", +"₅" => "5", +"₆" => "6", +"₇" => "7", +"₈" => "8", +"₉" => "9", +"₊" => "+", +"₋" => "-", +"₌" => "=", +"₍" => "(", +"₎" => ")", +); + +my $UnicodeSupRE = join '|', keys %UnicodeSuperscripts; +my $UnicodeSubRE = join '|', keys %UnicodeSubscripts; + +# Replaces Unicode superscripts and subscripts with corresponding +# ASCIIMathML syntax +# Arguments: string +# Returns: resultant string +sub _replaceUniSuperSubScripts { + my ($s) = @_; + $s =~ s@(($UnicodeSupRE)+)@ + my $repl = $1; + $repl =~ s/($UnicodeSupRE)/$UnicodeSuperscripts{$1}/eg; + (length $repl) > 1 ? "^($repl)" : "^$repl"; + @egs; + + $s =~ s@(($UnicodeSubRE)+)@ + my $repl = $1; + $repl =~ s/($UnicodeSubRE)/$UnicodeSubscripts{$1}/eg; + (length $repl) > 1 ? "_($repl)" : "_$repl"; + @egs; + + return $s; +} +} + # Removes outermost parenthesis # Arguments: string # Returns: string with parentheses removed From 497e33e36e1385dc78dec99e24f0985ee4aa4f66 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 13 Oct 2014 14:59:46 +0200 Subject: [PATCH 39/45] All scripts: fall back to __FILE__ if not link --- bin/mmd2LaTeX.pl | 2 +- bin/mmd2PDF.pl | 2 +- bin/mmd2PDFXeLaTeX.pl | 2 +- bin/mmd2RTF.pl | 2 +- bin/mmd2XHTML.pl | 2 +- bin/mmd2letter.pl | 2 +- bin/mmd2web.pl | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/mmd2LaTeX.pl b/bin/mmd2LaTeX.pl index e530d0f..51fd283 100755 --- a/bin/mmd2LaTeX.pl +++ b/bin/mmd2LaTeX.pl @@ -30,7 +30,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2PDF.pl b/bin/mmd2PDF.pl index c2b0c38..8f98aac 100755 --- a/bin/mmd2PDF.pl +++ b/bin/mmd2PDF.pl @@ -30,7 +30,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2PDFXeLaTeX.pl b/bin/mmd2PDFXeLaTeX.pl index 9404aa8..b7ac504 100755 --- a/bin/mmd2PDFXeLaTeX.pl +++ b/bin/mmd2PDFXeLaTeX.pl @@ -30,7 +30,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2RTF.pl b/bin/mmd2RTF.pl index f23ce55..78946df 100755 --- a/bin/mmd2RTF.pl +++ b/bin/mmd2RTF.pl @@ -30,7 +30,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2XHTML.pl b/bin/mmd2XHTML.pl index 537b3c7..0751b07 100755 --- a/bin/mmd2XHTML.pl +++ b/bin/mmd2XHTML.pl @@ -30,7 +30,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2letter.pl b/bin/mmd2letter.pl index 8c4c3f5..146838f 100755 --- a/bin/mmd2letter.pl +++ b/bin/mmd2letter.pl @@ -31,7 +31,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" diff --git a/bin/mmd2web.pl b/bin/mmd2web.pl index 5ed0513..534570f 100755 --- a/bin/mmd2web.pl +++ b/bin/mmd2web.pl @@ -33,7 +33,7 @@ # Determine where MMD is installed. Use a "common installation" if available. -my $me = readlink(__FILE__); # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); # Determine whether we are in "file mode" or "stdin mode" From 38df87e4fd7d2ae6ec872e9b2565b352461a390d Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 13 Oct 2014 15:42:43 +0200 Subject: [PATCH 40/45] More UTF-8 for ASCIIMathML --- lib/Text/ASCIIMathML.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index 6d6acdf..0b34153 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -659,7 +659,9 @@ my %AMSymbol = ( "setminus" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, "∖" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, "xx" => { tag=>"mo", output=>"×", tex=>"times", ttype=>"CONST" }, +"×" => { tag=>"mo", output=>"×", tex=>"times", ttype=>"CONST" }, "-:" => { tag=>"mo", output=>"÷", tex=>"div", ttype=>"CONST" }, +"÷" => { tag=>"mo", output=>"÷", tex=>"div", ttype=>"CONST" }, "@" => { tag=>"mo", output=>"∘", tex=>"circ", ttype=>"CONST" }, "o+" => { tag=>"mo", output=>"⊕", tex=>"oplus", ttype=>"CONST" }, "ox" => { tag=>"mo", output=>"⊗", tex=>"otimes", ttype=>"CONST" }, From 094052fb8281f282bef68408094fa3452d6bb7b1 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 27 Jul 2019 08:12:15 +0200 Subject: [PATCH 41/45] Some currency symbols --- lib/Text/ASCIIMathML.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index 0b34153..c706a5a 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -803,6 +803,8 @@ my %AMSymbol = ( "ℤ" => { tag=>"mo", output=>"ℤ", tex=>'', ttype=>"CONST" }, "f" => { tag=>"mi", output=>"f", tex=>'', ttype=>"UNARY", func=>"true" }, "g" => { tag=>"mi", output=>"g", tex=>'', ttype=>"UNARY", func=>"true" }, +"€" => { tag=>"mi", output=>"€", tex=>'', ttype=>"CONST" }, +"¤" => { tag=>"mi", output=>"¤", tex=>'\textcurrency', ttype=>"CONST" }, # standard functions "lim" => { tag=>"mo", output=>"lim", tex=>'', ttype=>"UNDEROVER" }, From a7e11599d0b28519254aa635151cac2ca1a8b105 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 31 Aug 2019 00:01:36 +0200 Subject: [PATCH 42/45] Treat floor/ceil as parenthesis --- lib/Text/ASCIIMathML.pm | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index c706a5a..266d302 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -748,6 +748,14 @@ my %AMSymbol = ( "{" => { tag=>"mo", output=>"{", tex=>'', ttype=>"LEFTBRACKET" }, "}" => { tag=>"mo", output=>"}", tex=>'', ttype=>"RIGHTBRACKET" }, "|" => { tag=>"mo", output=>"|", tex=>'', ttype=>"LEFTRIGHT" }, +"|__" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"LEFTBRACKET" }, +"⌊" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"LEFTBRACKET" }, +"__|" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"RIGHTBRACKET" }, +"⌋" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"RIGHTBRACKET" }, +"|~" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"LEFTBRACKET" }, +"⌈" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"LEFTBRACKET" }, +"~|" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"RIGHTBRACKET" }, +"⌉" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"RIGHTBRACKET" }, # {input:"||", tag:"mo", output:"||", tex:null, ttype:LEFTRIGHT}, "(:" => { tag=>"mo", output=>"〈", tex=>"langle", ttype=>"LEFTBRACKET" }, ":)" => { tag=>"mo", output=>"〉", tex=>"rangle", ttype=>"RIGHTBRACKET" }, @@ -783,14 +791,6 @@ my %AMSymbol = ( "ddots" => { tag=>"mo", output=>"⋱", tex=>'', ttype=>"CONST" }, "diamond" => { tag=>"mo", output=>"⋄", tex=>'', ttype=>"CONST" }, "square" => { tag=>"mo", output=>"□", tex=>'', ttype=>"CONST" }, -"|__" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"CONST" }, -"⌊" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"CONST" }, -"__|" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"CONST" }, -"⌋" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"CONST" }, -"|~" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"CONST" }, -"⌈" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"CONST" }, -"~|" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"CONST" }, -"⌉" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"CONST" }, "CC" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, "ℂ" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, "NN" => { tag=>"mo", output=>"ℕ", tex=>'', ttype=>"CONST" }, From f51648fe149c5964fe3b7b1b2b9b6f833c85e8d2 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 22 Sep 2019 00:19:59 +0200 Subject: [PATCH 43/45] ASCIIMathML: more trig --- lib/Text/ASCIIMathML.pm | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/Text/ASCIIMathML.pm b/lib/Text/ASCIIMathML.pm index 266d302..273ad5f 100644 --- a/lib/Text/ASCIIMathML.pm +++ b/lib/Text/ASCIIMathML.pm @@ -830,6 +830,24 @@ my %AMSymbol = ( "min" => { tag=>"mo", output=>"min", tex=>'', ttype=>"UNDEROVER" }, "max" => { tag=>"mo", output=>"max", tex=>'', ttype=>"UNDEROVER" }, +# inverse trig functions are in the .js now, let's have them here too +"arcsin" => { tag=>"mo", output=>"arcsin", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccos" => { tag=>"mo", output=>"arccos", tex=>'', ttype=>"UNARY", func=>"true" }, +"arctan" => { tag=>"mo", output=>"arctan", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccot" => { tag=>"mo", output=>"arccot", tex=>'', ttype=>"UNARY", func=>"true" }, +"arcsec" => { tag=>"mo", output=>"arcsec", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccsc" => { tag=>"mo", output=>"arccsc", tex=>'', ttype=>"UNARY", func=>"true" }, +"arcsinh" => { tag=>"mo", output=>"arcsinh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccosh" => { tag=>"mo", output=>"arccosh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arctanh" => { tag=>"mo", output=>"arctanh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccoth" => { tag=>"mo", output=>"arccoth", tex=>'', ttype=>"UNARY", func=>"true" }, + +# also, we're quirky like this: +"versin" => { tag=>"mo", output=>"versin", tex=>'', ttype=>"UNARY", func=>"true" }, +"coversin" => { tag=>"mo", output=>"coversin", tex=>'', ttype=>"UNARY", func=>"true" }, +"exsec" => { tag=>"mo", output=>"exsec", tex=>'', ttype=>"UNARY", func=>"true" }, +"excsc" => { tag=>"mo", output=>"excsc", tex=>'', ttype=>"UNARY", func=>"true" }, + # arrows "uarr" => { tag=>"mo", output=>"↑", tex=>"uparrow", ttype=>"CONST" }, "darr" => { tag=>"mo", output=>"↓", tex=>"downarrow", ttype=>"CONST" }, From 38dfb5f7da71ab9d1583dae3bb850dc677ef581e Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 12 Aug 2023 13:07:50 +0200 Subject: [PATCH 44/45] Better HTML5 support + my stuff --- lib/MultiMarkdown.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index 7fceb5b..fc66c59 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -558,8 +558,11 @@ our $g_block_tags = qr{ dl | ol | ul | script | noscript | form | fieldset | iframe | ins | del | # HTML5 + address | canvas | picture | figcaption | svg | math | article | header | footer | section | aside | video | - output | hgroup | canvas | figure | nav + output | hgroup | canvas | figure | main | nav | + # my own + switch ) }x; # MultiMarkdown does not include `math` in the above list so that # Equations can optionally be included in separate paragraphs From 0615443087c86816be613562f5fd8793d1b32d33 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 6 Dec 2025 11:09:43 +0100 Subject: [PATCH 45/45] details & summary are block-level tags --- lib/MultiMarkdown.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm index fc66c59..4a20df3 100644 --- a/lib/MultiMarkdown.pm +++ b/lib/MultiMarkdown.pm @@ -561,6 +561,7 @@ our $g_block_tags = qr{ address | canvas | picture | figcaption | svg | math | article | header | footer | section | aside | video | output | hgroup | canvas | figure | main | nav | + details | summary | # my own switch )