s around // "paragraphs" that are wrapped in non-block-level tags, such as anchors, // phrase emphasis, and spans. The list of tags we're looking for is // hard-coded: var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|section|header|footer|nav|article|aside"; var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside"; // First, look for nested blocks, e.g.: //
tags around block-level tags.
text = _HashHTMLBlocks(text);
text = _FormParagraphs(text);
return text;
};
var _RunSpanGamut = function (text) {
//
// These are all the transformations that occur *within* block-level
// tags like paragraphs, headers, and list items.
//
text = _DoCodeSpans(text);
text = _EscapeSpecialCharsWithinTagAttributes(text);
text = _EncodeBackslashEscapes(text);
// Process anchor and image tags. Images must come first,
// because ![foo][f] looks like an anchor.
text = _DoImages(text);
text = _DoAnchors(text);
// Make links out of things like ` Just type tags
//
// Strip leading and trailing lines:
text = text.replace(/^\n+/g, "");
text = text.replace(/\n+$/g, "");
var grafs = text.split(/\n{2,}/g);
var grafsOut = [];
//
// Wrap tags.
//
var end = grafs.length;
for (var i = 0; i < end; i++) {
var str = grafs[i];
// if this is an HTML marker, copy it
if (str.search(/~K(\d+)K/g) >= 0) {
grafsOut.push(str);
}
else if (str.search(/\S/) >= 0) {
str = _RunSpanGamut(str);
str = str.replace(/^([ \t]*)/g, " ");
str += "
\n");
return text;
}
var _EscapeSpecialCharsWithinTagAttributes = function (text) {
//
// Within tags -- meaning between < and > -- encode [\ ` * _] so they
// don't conflict with their use in Markdown for code, italics and strong.
//
// Build a regex to find HTML tags and comments. See Friedl's
// "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)/gi;
text = text.replace(regex, function (wholeMatch) {
var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g, "$1`");
tag = escapeCharacters(tag, "\\`*_");
return tag;
});
return text;
}
var _DoAnchors = function (text) {
//
// Turn Markdown link shortcuts into XHTML tags.
//
//
// First, handle reference-style links: [link text] [id]
//
/*
text = text.replace(/
( // wrap whole match in $1
\[
(
(?:
\[[^\]]*\] // allow brackets nested one level
|
[^\[] // or anything else
)*
)
\]
[ ]? // one optional space
(?:\n[ ]*)? // one optional newline followed by spaces
\[
(.*?) // id = $3
\]
)()()()() // pad remaining backreferences
/g,_DoAnchors_callback);
*/
text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeAnchorTag);
//
// Next, inline-style links: [link text](url "optional title")
//
/*
text = text.replace(/
( // wrap whole match in $1
\[
(
(?:
\[[^\]]*\] // allow brackets nested one level
|
[^\[\]] // or anything else
)
)
\]
\( // literal paren
[ \t]*
() // no id, so leave $3 empty
(.*?)>? // href = $4
[ \t]*
( // $5
(['"]) // quote char = $6
(.*?) // Title = $7
\6 // matching quote
[ \t]* // ignore any spaces/tabs between closing quote and )
)? // title is optional
\)
)
/g,writeAnchorTag);
*/
text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()(.*?(?:\(.*?\).*?)?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeAnchorTag);
//
// Last, handle reference-style shortcuts: [link text]
// These must come last in case you've also got [link test][1]
// or [link test](/foo)
//
/*
text = text.replace(/
( // wrap whole match in $1
\[
([^\[\]]+) // link text = $2; can't contain '[' or ']'
\]
)()()()()() // pad rest of backreferences
/g, writeAnchorTag);
*/
text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
return text;
}
var writeAnchorTag = function (wholeMatch, m1, m2, m3, m4, m5, m6, m7) {
if (m7 == undefined) m7 = "";
var whole_match = m1;
var link_text = m2;
var link_id = m3.toLowerCase();
var url = m4;
var title = m7;
if (url == "") {
if (link_id == "") {
// lower-case and turn embedded newlines into spaces
link_id = link_text.toLowerCase().replace(/ ?\n/g, " ");
}
url = "#" + link_id;
if (g_urls[link_id] != undefined) {
url = g_urls[link_id];
if (g_titles[link_id] != undefined) {
title = g_titles[link_id];
}
}
else {
if (whole_match.search(/\(\s*\)$/m) > -1) {
// Special case for explicit empty url
url = "";
} else {
return whole_match;
}
}
}
url = escapeCharacters(url, "*_");
var result = "" + link_text + "";
return result;
}
var _DoImages = function (text) {
//
// Turn Markdown image shortcuts into tags.
//
//
// First, handle reference-style labeled images: ![alt text][id]
//
/*
text = text.replace(/
( // wrap whole match in $1
!\[
(.*?) // alt text = $2
\]
[ ]? // one optional space
(?:\n[ ]*)? // one optional newline followed by spaces
\[
(.*?) // id = $3
\]
)()()()() // pad rest of backreferences
/g,writeImageTag);
*/
text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeImageTag);
//
// Next, handle inline images: ![alt text](url "optional title")
// Don't forget: encode * and _
/*
text = text.replace(/
( // wrap whole match in $1
!\[
(.*?) // alt text = $2
\]
\s? // One optional whitespace character
\( // literal paren
[ \t]*
() // no id, so leave $3 empty
(\S+?)>? // src url = $4
[ \t]*
( // $5
(['"]) // quote char = $6
(.*?) // title = $7
\6 // matching quote
[ \t]*
)? // title is optional
\)
)
/g,writeImageTag);
*/
text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeImageTag);
return text;
}
var writeImageTag = function (wholeMatch, m1, m2, m3, m4, m5, m6, m7) {
var whole_match = m1;
var alt_text = m2;
var link_id = m3.toLowerCase();
var url = m4;
var title = m7;
if (!title) title = "";
if (url == "") {
if (link_id == "") {
// lower-case and turn embedded newlines into spaces
link_id = alt_text.toLowerCase().replace(/ ?\n/g, " ");
}
url = "#" + link_id;
if (g_urls[link_id] != undefined) {
url = g_urls[link_id];
if (g_titles[link_id] != undefined) {
title = g_titles[link_id];
}
}
else {
return whole_match;
}
}
alt_text = alt_text.replace(/"/g, """);
url = escapeCharacters(url, "*_");
var result = "";
return result;
}
var _DoHeaders = function (text) {
// Setext-style headers:
// Header 1
// ========
//
// Header 2
// --------
//
text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
function (wholeMatch, m1) {
return hashBlock('' + _RunSpanGamut(m1) + "
");
});
text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
function (matchFound, m1) {
return hashBlock('' + _RunSpanGamut(m1) + "
");
});
// atx-style headers:
// # Header 1
// ## Header 2
// ## Header 2 with closing hashes ##
// ...
// ###### Header 6
//
/*
text = text.replace(/
^(\#{1,6}) // $1 = string of #'s
[ \t]*
(.+?) // $2 = Header text
[ \t]*
\#* // optional closing #'s (not counted)
\n+
/gm, function() {...});
*/
text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
function (wholeMatch, m1, m2) {
var h_level = m1.length;
return hashBlock("` blocks.
//
/*
text = text.replace(text,
/(?:\n\n|^)
( // $1 = the code block -- one or more lines, starting with a space/tab
(?:
(?:[ ]{4}|\t) // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
.*\n+
)+
)
(\n*[ ]{0,3}[^ \t\n]|(?=~0)) // attacklab: g_tab_width
/g,function(){...});
*/
// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
text += "~0";
text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
function (wholeMatch, m1, m2) {
var codeblock = m1;
var nextChar = m2;
codeblock = _EncodeCode(_Outdent(codeblock));
codeblock = _Detab(codeblock);
codeblock = codeblock.replace(/^\n+/g, ""); // trim leading newlines
codeblock = codeblock.replace(/\n+$/g, ""); // trim trailing whitespace
codeblock = "
";
return hashBlock(codeblock) + nextChar;
}
);
// attacklab: strip sentinel
text = text.replace(/~0/, "");
return text;
};
var _DoGithubCodeBlocks = function (text) {
//
// Process Github-style code blocks
// Example:
// ```ruby
// def hello_world(x)
// puts "Hello, #{x}"
// end
// ```
//
// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
text += "~0";
text = text.replace(/(?:^|\n)```(.*)\n([\s\S]*?)\n```/g,
function (wholeMatch, m1, m2) {
var language = m1;
var codeblock = m2;
codeblock = _EncodeCode(codeblock);
codeblock = _Detab(codeblock);
codeblock = codeblock.replace(/^\n+/g, ""); // trim leading newlines
codeblock = codeblock.replace(/\n+$/g, ""); // trim trailing whitespace
codeblock = "" + codeblock + "\n
";
return hashBlock(codeblock);
}
);
// attacklab: strip sentinel
text = text.replace(/~0/, "");
return text;
}
var hashBlock = function (text) {
text = text.replace(/(^\n+|\n+$)/g, "");
return "\n\n~K" + (g_html_blocks.push(text) - 1) + "K\n\n";
}
var _DoCodeSpans = function (text) {
//
// * Backtick quotes are used for " + codeblock + "\n
spans.
//
// * You can use multiple backticks as the delimiters if you want to
// include literal backticks in the code span. So, this input:
//
// Just type ``foo `bar` baz`` at the prompt.
//
// Will translate to:
//
//
foo `bar` baz
at the prompt.`bar`
...
//
/*
text = text.replace(/
(^|[^\\]) // Character before opening ` can't be a backslash
(`+) // $2 = Opening run of `
( // $3 = The code block
[^\r]*?
[^`] // attacklab: work around lack of lookbehind
)
\2 // Matching closer
(?!`)
/gm, function(){...});
*/
text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
function (wholeMatch, m1, m2, m3, m4) {
var c = m3;
c = c.replace(/^([ \t]*)/g, ""); // leading whitespace
c = c.replace(/[ \t]*$/g, ""); // trailing whitespace
c = _EncodeCode(c);
return m1 + "" + c + "
";
});
return text;
}
var _EncodeCode = function (text) {
//
// Encode/escape certain characters inside Markdown code runs.
// The point is that in code, these characters are literals,
// and lose their special Markdown meanings.
//
// Encode all ampersands; HTML entities are not
// entities within a Markdown code span.
text = text.replace(/&/g, "&");
// Do the angle bracket song and dance:
text = text.replace(//g, ">");
// Now, escape characters that are magic in Markdown:
text = escapeCharacters(text, "\*_{}[]\\", false);
// jj the line above breaks this:
//---
//* Item
// 1. Subitem
// special char: *
//---
return text;
}
var _DoItalicsAndBold = function (text) {
// must go first:
text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
"$2");
text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
"$2");
return text;
}
var _DoBlockQuotes = function (text) {
/*
text = text.replace(/
( // Wrap whole match in $1
(
^[ \t]*>[ \t]? // '>' at the start of a line
.+\n // rest of the first line
(.+\n)* // subsequent consecutive lines
\n* // blanks
)+
)
/gm, function(){...});
*/
text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
function (wholeMatch, m1) {
var bq = m1;
// attacklab: hack around Konqueror 3.5.4 bug:
// "----------bug".replace(/^-/g,"") == "bug"
bq = bq.replace(/^[ \t]*>[ \t]?/gm, "~0"); // trim one level of quoting
// attacklab: clean up hack
bq = bq.replace(/~0/g, "");
bq = bq.replace(/^[ \t]+$/gm, ""); // trim whitespace-only lines
bq = _RunBlockGamut(bq); // recurse
bq = bq.replace(/(^|\n)/g, "$1 ");
// These leading spaces screw with content, so we need to fix that:
bq = bq.replace(
/(\s*
[^\r]+?<\/pre>)/gm,
function (wholeMatch, m1) {
var pre = m1;
// attacklab: hack around Konqueror 3.5.4 bug:
pre = pre.replace(/^ /mg, "~0");
pre = pre.replace(/~0/g, "");
return pre;
});
return hashBlock("
\n" + bq + "\n
");
});
return text;
}
var _FormParagraphs = function (text) {
//
// Params:
// $text - string to process with html