]> git.openstreetmap.org Git - osqa.git/blob - forum/skins/default/media/js/wmd/showdown.js
Prevent XSS attacks with wmd using the google-caja html sanitizer.
[osqa.git] / forum / skins / default / media / js / wmd / showdown.js
1 //
2 // showdown.js -- A javascript port of Markdown.
3 //
4 // Copyright (c) 2007 John Fraser.
5 //
6 // Original Markdown Copyright (c) 2004-2005 John Gruber
7 //   <http://daringfireball.net/projects/markdown/>
8 //
9 // The full source distribution is at:
10 //
11 //                              A A L
12 //                              T C A
13 //                              T K B
14 //
15 //   <http://www.attacklab.net/>
16 //
17
18 //
19 // Wherever possible, Showdown is a straight, line-by-line port
20 // of the Perl version of Markdown.
21 //
22 // This is not a normal parser design; it's basically just a
23 // series of string substitutions.  It's hard to read and
24 // maintain this way,  but keeping Showdown close to the original
25 // design makes it easier to port new features.
26 //
27 // More importantly, Showdown behaves like markdown.pl in most
28 // edge cases.  So web applications can do client-side preview
29 // in Javascript, and then build identical HTML on the server.
30 //
31 // This port needs the new RegExp functionality of ECMA 262,
32 // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
33 // should do fine.  Even with the new regular expression features,
34 // We do a lot of work to emulate Perl's regex functionality.
35 // The tricky changes in this file mostly have the "attacklab:"
36 // label.  Major or self-explanatory changes don't.
37 //
38 // Smart diff tools like Araxis Merge will be able to match up
39 // this file with markdown.pl in a useful way.  A little tweaking
40 // helps: in a copy of markdown.pl, replace "#" with "//" and
41 // replace "$text" with "text".  Be sure to ignore whitespace
42 // and line endings.
43 //
44
45
46 //
47 // Showdown usage:
48 //
49 //   var text = "Markdown *rocks*.";
50 //
51 //   var converter = new Attacklab.showdown.converter();
52 //   var html = converter.makeHtml(text);
53 //
54 //   alert(html);
55 //
56 // Note: move the sample code to the bottom of this
57 // file before uncommenting it.
58 //
59
60
61 //
62 // Attacklab namespace
63 //
64 var Attacklab = Attacklab || {}
65
66 //
67 // Showdown namespace
68 //
69 Attacklab.showdown = Attacklab.showdown || {}
70
71 //
72 // converter
73 //
74 // Wraps all "globals" so that the only thing
75 // exposed is makeHtml().
76 //
77 Attacklab.showdown.converter = function() {
78
79
80 // g_urls and g_titles allow arbitrary user-entered strings as keys. This
81 // caused an exception (and hence stopped the rendering) when the user entered
82 // e.g. [push] or [__proto__]. Adding a prefix to the actual key prevents this
83 // (since no builtin property starts with "s_"). See
84 // http://meta.stackoverflow.com/questions/64655/strange-wmd-bug
85 // (granted, switching from Array() to Object() alone would have left only __proto__
86 // to be a problem)
87 var SaveHash = function () {
88     this.set = function (key, value) {
89         this["s_" + key] = value;
90     }
91     this.get = function (key) {
92         return this["s_" + key];
93     }
94 }
95
96 //
97 // Globals:
98 //
99
100 // Global hashes, used by various utility routines
101 var g_urls;
102 var g_titles;
103 var g_html_blocks;
104
105 // Used to track when we're inside an ordered or unordered list
106 // (see _ProcessListItems() for details):
107 var g_list_level = 0;
108
109
110 this.makeHtml = function(text) {
111 //
112 // Main function. The order in which other subs are called here is
113 // essential. Link and image substitutions need to happen before
114 // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
115 // and <img> tags get encoded.
116 //
117     text = html_sanitize(text, function(url) {return url;}, function(id) {return id;});
118
119         // Clear the global hashes. If we don't clear these, you get conflicts
120         // from other articles when generating a page which contains more than
121         // one article (e.g. an index page that shows the N most recent
122         // articles):
123     g_urls = new SaveHash();
124     g_titles = new SaveHash();
125         g_html_blocks = new Array();
126
127         // attacklab: Replace ~ with ~T
128         // This lets us use tilde as an escape char to avoid md5 hashes
129         // The choice of character is arbitray; anything that isn't
130     // magic in Markdown will work.
131         text = text.replace(/~/g,"~T");
132
133         // attacklab: Replace $ with ~D
134         // RegExp interprets $ as a special character
135         // when it's in a replacement string
136         text = text.replace(/\$/g,"~D");
137
138         // Standardize line endings
139         text = text.replace(/\r\n/g,"\n"); // DOS to Unix
140         text = text.replace(/\r/g,"\n"); // Mac to Unix
141
142         // Make sure text begins and ends with a couple of newlines:
143         text = "\n\n" + text + "\n\n";
144
145         // Convert all tabs to spaces.
146         text = _Detab(text);
147
148         // Strip any lines consisting only of spaces and tabs.
149         // This makes subsequent regexen easier to write, because we can
150         // match consecutive blank lines with /\n+/ instead of something
151         // contorted like /[ \t]*\n+/ .
152         text = text.replace(/^[ \t]+$/mg,"");
153
154         // Turn block-level HTML blocks into hash entries
155         text = _HashHTMLBlocks(text);
156
157         // Strip link definitions, store in hashes.
158         text = _StripLinkDefinitions(text);
159
160         text = _RunBlockGamut(text);
161
162         text = _UnescapeSpecialChars(text);
163
164         // attacklab: Restore dollar signs
165         text = text.replace(/~D/g,"$$");
166
167         // attacklab: Restore tildes
168         text = text.replace(/~T/g,"~");
169
170         return text;
171 }
172
173 var _StripLinkDefinitions = function(text) {
174 //
175 // Strips link definitions from text, stores the URLs and titles in
176 // hash references.
177 //
178
179         // Link defs are in the form: ^[id]: url "optional title"
180
181         /*
182                 var text = text.replace(/
183                                 ^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
184                                   [ \t]*
185                                   \n?                           // maybe *one* newline
186                                   [ \t]*
187                                 <?(\S+?)>?                      // url = $2
188                 (?=\s|$)            // lookahead for whitespace instead of the lookbehind removed below
189                                   [ \t]*
190                                   \n?                           // maybe one newline
191                                   [ \t]*
192                                 (                   // (potential) title = $3
193                                   (\n*)                         // any lines skipped = $4 attacklab: lookbehind removed
194                   [ \t]+
195                                   ["(]
196                                   (.+?)                         // title = $5
197                                   [")]
198                                   [ \t]*
199                                 )?                                      // title is optional
200                                 (?:\n+|$)
201                           /gm,
202                           function(){...});
203         */
204         var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?(?=\s|$)[ \t]*\n?[ \t]*((\n*)["(](.+?)[")][ \t]*)?(?:\n+)/gm,
205                 function (wholeMatch,m1,m2,m3,m4,m5) {
206                         m1 = m1.toLowerCase();
207                         g_urls.set(m1, _EncodeAmpsAndAngles(m2));  // Link IDs are case-insensitive
208                         if (m4) {
209                                 // Oops, found blank lines, so it's not a title.
210                                 // Put back the parenthetical statement we stole.
211                                 return m3;
212                         } else if (m5) {
213                                 g_titles.set(m1, m5.replace(/"/g,"&quot;"));
214                         }
215                         
216                         // Completely remove the definition from the text
217                         return "";
218                 }
219         );
220
221         return text;
222 }
223
224 var _HashHTMLBlocks = function(text) {
225
226         // Hashify HTML blocks:
227         // We only want to do this for block-level HTML tags, such as headers,
228         // lists, and tables. That's because we still want to wrap <p>s around
229         // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
230         // phrase emphasis, and spans. The list of tags we're looking for is
231         // hard-coded:
232         var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
233         var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
234
235         // First, look for nested blocks, e.g.:
236         //   <div>
237         //     <div>
238         //     tags for inner block must be indented.
239         //     </div>
240         //   </div>
241         //
242         // The outermost tags must start at the left margin for this to match, and
243         // the inner nested divs must be indented.
244         // We need to do this before the next, more liberal match, because the next
245         // match will start at the first `<div>` and stop at the first `</div>`.
246
247         // attacklab: This regex can be expensive when it fails.
248         /*
249                 var text = text.replace(/
250                 (                                               // save in $1
251                         ^                                       // start of line  (with /m)
252                         <($block_tags_a)        // start tag = $2
253                         \b                                      // word break
254                                                                 // attacklab: hack around khtml/pcre bug...
255                         [^\r]*?\n                       // any number of lines, minimally matching
256                         </\2>                           // the matching end tag
257                         [ \t]*                          // trailing spaces/tabs
258                         (?=\n+)                         // followed by a newline
259                 )                                               // attacklab: there are sentinel newlines at end of document
260                 /gm,function(){...}};
261         */
262         text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
263
264         //
265         // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
266         //
267
268         /*
269                 var text = text.replace(/
270                 (                                               // save in $1
271                         ^                                       // start of line  (with /m)
272                         <($block_tags_b)        // start tag = $2
273                         \b                                      // word break
274                                                                 // attacklab: hack around khtml/pcre bug...
275                         [^\r]*?                         // any number of lines, minimally matching
276                         .*</\2>                         // the matching end tag
277                         [ \t]*                          // trailing spaces/tabs
278                         (?=\n+)                         // followed by a newline
279                 )                                               // attacklab: there are sentinel newlines at end of document
280                 /gm,function(){...}};
281         */
282         text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
283
284         // Special case just for <hr />. It was easier to make a special case than
285         // to make the other regex more complicated.  
286
287         /*
288                 text = text.replace(/
289                 \n                                  // Starting after a blank line
290                 [ ]{0,3}
291                 (                                               // save in $1
292                         (<(hr)                          // start tag = $2
293                         \b                                      // word break
294                         ([^<>])*?                       // 
295                         \/?>)                           // the matching end tag
296                         [ \t]*
297                         (?=\n{2,})                      // followed by a blank line
298                 )
299                 /g,hashElement);
300         */
301         text = text.replace(/\n[ ]{0,3}((<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
302
303         // Special case for standalone HTML comments:
304
305         /*
306                 text = text.replace(/
307                 \n\n                            // Starting after a blank line
308                 [ ]{0,3}                        // attacklab: g_tab_width - 1
309                 (                                               // save in $1
310                         <!
311                         (--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)    // see http://www.w3.org/TR/html-markup/syntax.html#comments
312                         >
313                         [ \t]*
314                         (?=\n{2,})                      // followed by a blank line
315                 )
316                 /g,hashElement);
317         */
318         text = text.replace(/\n\n[ ]{0,3}(<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>[ \t]*(?=\n{2,}))/g, hashElement);
319
320         // PHP and ASP-style processor instructions (<?...?> and <%...%>)
321
322         /*
323                 text = text.replace(/
324                 (?:
325                         \n\n                            // Starting after a blank line
326                 )
327                 (                                               // save in $1
328                         [ ]{0,3}                        // attacklab: g_tab_width - 1
329                         (?:
330                                 <([?%])                 // $2
331                                 [^\r]*?
332                                 \2>
333                         )
334                         [ \t]*
335                         (?=\n{2,})                      // followed by a blank line
336                 )
337                 /g,hashElement);
338         */
339         text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
340
341         return text;
342 }
343
344 var hashElement = function(wholeMatch,m1) {
345         var blockText = m1;
346
347         // Undo double lines
348         blockText = blockText.replace(/^\n+/,"");
349         
350         // strip trailing blank lines
351         blockText = blockText.replace(/\n+$/g,"");
352         
353         // Replace the element text with a marker ("~KxK" where x is its key)
354         blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
355         
356         return blockText;
357 };
358
359 var _RunBlockGamut = function(text, doNotUnhash) {
360 //
361 // These are all the transformations that form block-level
362 // tags like paragraphs, headers, and list items.
363 //
364         text = _DoHeaders(text);
365
366         // Do Horizontal Rules:
367         var key = hashBlock("<hr />");
368         text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
369         text = text.replace(/^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$/gm,key);
370         text = text.replace(/^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$/gm,key);
371
372         text = _DoLists(text);
373         text = _DoCodeBlocks(text);
374         text = _DoBlockQuotes(text);
375
376         // We already ran _HashHTMLBlocks() before, in Markdown(), but that
377         // was to escape raw HTML in the original Markdown source. This time,
378         // we're escaping the markup we've just created, so that we don't wrap
379         // <p> tags around block-level tags.
380         text = _HashHTMLBlocks(text);
381     text = _FormParagraphs(text, doNotUnhash);
382
383         return text;
384 }
385
386
387 var _RunSpanGamut = function(text) {
388 //
389 // These are all the transformations that occur *within* block-level
390 // tags like paragraphs, headers, and list items.
391 //
392
393         text = _DoCodeSpans(text);
394         text = _EscapeSpecialCharsWithinTagAttributes(text);
395         text = _EncodeBackslashEscapes(text);
396
397         // Process anchor and image tags. Images must come first,
398         // because ![foo][f] looks like an anchor.
399         text = _DoImages(text);
400         text = _DoAnchors(text);
401
402         // Make links out of things like `<http://example.com/>`
403         // Must come after _DoAnchors(), because you can use < and >
404         // delimiters in inline links like [this](<url>).
405         text = _DoAutoLinks(text);
406         text = _EncodeAmpsAndAngles(text);
407         text = _DoItalicsAndBold(text);
408
409         // Do hard breaks:
410         text = text.replace(/  +\n/g," <br />\n");
411
412         return text;
413 }
414
415 var _EscapeSpecialCharsWithinTagAttributes = function(text) {
416 //
417 // Within tags -- meaning between < and > -- encode [\ ` * _] so they
418 // don't conflict with their use in Markdown for code, italics and strong.
419 //
420
421         // Build a regex to find HTML tags and comments.  See Friedl's 
422     // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
423     
424     // SE: changed the comment part of the regex
425
426     var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi;
427
428         text = text.replace(regex, function(wholeMatch) {
429                 var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
430                 tag = escapeCharacters(tag,"\\`*_");
431                 return tag;
432         });
433
434         return text;
435 }
436
437 var _DoAnchors = function(text) {
438 //
439 // Turn Markdown link shortcuts into XHTML <a> tags.
440 //
441         //
442         // First, handle reference-style links: [link text] [id]
443         //
444
445         /*
446                 text = text.replace(/
447                 (                                                       // wrap whole match in $1
448                         \[
449                         (
450                                 (?:
451                                         \[[^\]]*\]              // allow brackets nested one level
452                                         |
453                                         [^\[]                   // or anything else
454                                 )*
455                         )
456                         \]
457
458                         [ ]?                                    // one optional space
459                         (?:\n[ ]*)?                             // one optional newline followed by spaces
460
461                         \[
462                         (.*?)                                   // id = $3
463                         \]
464                 )()()()()                                       // pad remaining backreferences
465                 /g,_DoAnchors_callback);
466         */
467         text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
468
469         //
470         // Next, inline-style links: [link text](url "optional title")
471         //
472
473         /*
474                 text = text.replace(/
475                 (                                               // wrap whole match in $1
476                         \[
477                                 (
478                                         (?:
479                                                 \[[^\]]*\]      // allow brackets nested one level
480                                             |
481                                             [^\[\]]             // or anything else
482                                     )*
483                             )
484                         \]
485                         \(                                              // literal paren
486                         [ \t]*
487                         ()                                              // no id, so leave $3 empty
488                         <?(                     // href = $4
489                 (?:
490                     \([^)]*\)       // allow one level of (correctly nested) parens (think MSDN)
491                     |
492                     [^()]
493                 )*?
494             )>?                         
495                         [ \t]*
496                         (                                               // $5
497                                 (['"])                          // quote char = $6
498                                 (.*?)                           // Title = $7
499                                 \6                                      // matching quote
500                                 [ \t]*                          // ignore any spaces/tabs between closing quote and )
501                         )?                                              // title is optional
502                         \)
503                 )
504                 /g,writeAnchorTag);
505         */
506     
507         text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?((?:\([^)]*\)|[^()])*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
508
509         //
510         // Last, handle reference-style shortcuts: [link text]
511         // These must come last in case you've also got [link test][1]
512         // or [link test](/foo)
513         //
514
515         /*
516                 text = text.replace(/
517                 (                                                       // wrap whole match in $1
518                         \[
519                         ([^\[\]]+)                              // link text = $2; can't contain '[' or ']'
520                         \]
521                 )()()()()()                                     // pad rest of backreferences
522                 /g, writeAnchorTag);
523         */
524         text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
525
526         return text;
527 }
528
529 var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
530         if (m7 == undefined) m7 = "";
531         var whole_match = m1;
532         var link_text   = m2;
533         var link_id      = m3.toLowerCase();
534         var url         = m4;
535         var title       = m7;
536         
537         if (url == "") {
538                 if (link_id == "") {
539                         // lower-case and turn embedded newlines into spaces
540                         link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
541                 }
542                 url = "#"+link_id;
543                 
544                 if (g_urls.get(link_id) != undefined) {
545                         url = g_urls.get(link_id);
546                         if (g_titles.get(link_id) != undefined) {
547                                 title = g_titles.get(link_id);
548                         }
549                 }
550                 else {
551                         if (whole_match.search(/\(\s*\)$/m)>-1) {
552                                 // Special case for explicit empty url
553                                 url = "";
554                         } else {
555                                 return whole_match;
556                         }
557                 }
558         }       
559         
560         url = escapeCharacters(url,"*_");
561         var result = "<a href=\"" + url + "\"";
562         
563         if (title != "") {
564                 title = title.replace(/"/g,"&quot;");
565                 title = escapeCharacters(title,"*_");
566                 result +=  " title=\"" + title + "\"";
567         }
568         
569         result += ">" + link_text + "</a>";
570         
571         return result;
572 }
573
574
575 var _DoImages = function(text) {
576 //
577 // Turn Markdown image shortcuts into <img> tags.
578 //
579
580         //
581         // First, handle reference-style labeled images: ![alt text][id]
582         //
583
584         /*
585                 text = text.replace(/
586                 (                                               // wrap whole match in $1
587                         !\[
588                         (.*?)                           // alt text = $2
589                         \]
590
591                         [ ]?                            // one optional space
592                         (?:\n[ ]*)?                     // one optional newline followed by spaces
593
594                         \[
595                         (.*?)                           // id = $3
596                         \]
597                 )()()()()                               // pad rest of backreferences
598                 /g,writeImageTag);
599         */
600         text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
601
602         //
603         // Next, handle inline images:  ![alt text](url "optional title")
604         // Don't forget: encode * and _
605
606         /*
607                 text = text.replace(/
608                 (                                               // wrap whole match in $1
609                         !\[
610                         (.*?)                           // alt text = $2
611                         \]
612                         \s?                                     // One optional whitespace character
613                         \(                                      // literal paren
614                         [ \t]*
615                         ()                                      // no id, so leave $3 empty
616                         <?(\S+?)>?                      // src url = $4
617                         [ \t]*
618                         (                                       // $5
619                                 (['"])                  // quote char = $6
620                                 (.*?)                   // title = $7
621                                 \6                              // matching quote
622                                 [ \t]*
623                         )?                                      // title is optional
624                 \)
625                 )
626                 /g,writeImageTag);
627         */
628         text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
629
630         return text;
631 }
632
633 var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
634         var whole_match = m1;
635         var alt_text   = m2;
636         var link_id      = m3.toLowerCase();
637         var url         = m4;
638         var title       = m7;
639
640         if (!title) title = "";
641         
642         if (url == "") {
643                 if (link_id == "") {
644                         // lower-case and turn embedded newlines into spaces
645                         link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
646                 }
647                 url = "#"+link_id;
648                 
649                 if (g_urls.get(link_id) != undefined) {
650                         url = g_urls.get(link_id);
651                         if (g_titles.get(link_id) != undefined) {
652                                 title = g_titles.get(link_id);
653                         }
654                 }
655                 else {
656                         return whole_match;
657                 }
658         }       
659         
660         alt_text = alt_text.replace(/"/g,"&quot;");
661         url = escapeCharacters(url,"*_");
662         var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
663
664         // attacklab: Markdown.pl adds empty title attributes to images.
665         // Replicate this bug.
666
667         //if (title != "") {
668                 title = title.replace(/"/g,"&quot;");
669                 title = escapeCharacters(title,"*_");
670                 result +=  " title=\"" + title + "\"";
671         //}
672         
673         result += " />";
674         
675         return result;
676 }
677
678
679 var _DoHeaders = function(text) {
680
681         // Setext-style headers:
682         //      Header 1
683         //      ========
684         //  
685         //      Header 2
686         //      --------
687         //
688         text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
689                 function(wholeMatch,m1){return "<h1>" + _RunSpanGamut(m1) + "</h1>\n\n";});
690
691         text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
692                 function(matchFound,m1){return "<h2>" + _RunSpanGamut(m1) + "</h2>\n\n";});
693
694         // atx-style headers:
695         //  # Header 1
696         //  ## Header 2
697         //  ## Header 2 with closing hashes ##
698         //  ...
699         //  ###### Header 6
700         //
701
702         /*
703                 text = text.replace(/
704                         ^(\#{1,6})                              // $1 = string of #'s
705                         [ \t]*
706                         (.+?)                                   // $2 = Header text
707                         [ \t]*
708                         \#*                                             // optional closing #'s (not counted)
709                         \n+
710                 /gm, function() {...});
711         */
712
713         text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
714                 function(wholeMatch,m1,m2) {
715                         var h_level = m1.length;
716                         return "<h" + h_level + ">" + _RunSpanGamut(m2) + "</h" + h_level + ">\n\n";
717                 });
718
719         return text;
720 }
721
722 // This declaration keeps Dojo compressor from outputting garbage:
723 var _ProcessListItems;
724
725 var _DoLists = function(text) {
726 //
727 // Form HTML ordered (numbered) and unordered (bulleted) lists.
728 //
729
730         // attacklab: add sentinel to hack around khtml/safari bug:
731         // http://bugs.webkit.org/show_bug.cgi?id=11231
732         text += "~0";
733
734         // Re-usable pattern to match any entirel ul or ol list:
735
736         /*
737                 var whole_list = /
738                 (                                                                       // $1 = whole list
739                         (                                                               // $2
740                                 [ ]{0,3}                                        // attacklab: g_tab_width - 1
741                                 ([*+-]|\d+[.])                          // $3 = first list item marker
742                                 [ \t]+
743                         )
744                         [^\r]+?
745                         (                                                               // $4
746                                 ~0                                                      // sentinel for workaround; should be $
747                         |
748                                 \n{2,}
749                                 (?=\S)
750                                 (?!                                                     // Negative lookahead for another list item marker
751                                         [ \t]*
752                                         (?:[*+-]|\d+[.])[ \t]+
753                                 )
754                         )
755                 )/g
756         */
757         var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
758
759         if (g_list_level) {
760                 text = text.replace(whole_list,function(wholeMatch,m1,m2) {
761                         var list = m1;
762                         var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
763
764                         var result = _ProcessListItems(list, list_type);
765         
766                         // Trim any trailing whitespace, to put the closing `</$list_type>`
767                         // up on the preceding line, to get it past the current stupid
768                         // HTML block parser. This is a hack to work around the terrible
769                         // hack that is the HTML block parser.
770                         result = result.replace(/\s+$/,"");
771                         result = "<"+list_type+">" + result + "</"+list_type+">\n";
772                         return result;
773                 });
774         } else {
775                 whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
776                 text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
777                         var runup = m1;
778                         var list = m2;
779
780                         var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
781                         var result = _ProcessListItems(list, list_type);
782                         result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";   
783                         return result;
784                 });
785         }
786
787         // attacklab: strip sentinel
788         text = text.replace(/~0/,"");
789
790         return text;
791 }
792
793 var _listItemMarkers = { ol: "\\d+[.]", ul: "[*+-]" };
794
795 _ProcessListItems = function(list_str, list_type) {
796 //
797 //  Process the contents of a single ordered or unordered list, splitting it
798 //  into individual list items.
799 //
800 //  list_type is either "ul" or "ol".
801
802         // The $g_list_level global keeps track of when we're inside a list.
803         // Each time we enter a list, we increment it; when we leave a list,
804         // we decrement. If it's zero, we're not in a list anymore.
805         //
806         // We do this because when we're not inside a list, we want to treat
807         // something like this:
808         //
809         //    I recommend upgrading to version
810         //    8. Oops, now this line is treated
811         //    as a sub-list.
812         //
813         // As a single paragraph, despite the fact that the second line starts
814         // with a digit-period-space sequence.
815         //
816         // Whereas when we're inside a list (or sub-list), that line will be
817         // treated as the start of a sub-list. What a kludge, huh? This is
818         // an aspect of Markdown's syntax that's hard to parse perfectly
819         // without resorting to mind-reading. Perhaps the solution is to
820         // change the syntax rules such that sub-lists must start with a
821         // starting cardinal number; e.g. "1." or "a.".
822
823         g_list_level++;
824
825         // trim trailing blank lines:
826         list_str = list_str.replace(/\n{2,}$/,"\n");
827
828         // attacklab: add sentinel to emulate \z
829         list_str += "~0";
830
831         // In the original attacklab WMD, list_type was not given to this function, and anything
832         // that matched /[*+-]|\d+[.]/ would just create the next <li>, causing this mismatch:
833         //
834     //  Markdown          rendered by WMD        rendered by MarkdownSharp
835         //  ------------------------------------------------------------------
836         //  1. first          1. first               1. first
837         //  2. second         2. second              2. second
838         //  - third           3. third                   * third
839         //
840         // We changed this to behave identical to MarkdownSharp. This is the constructed RegEx,
841     // with {MARKER} being one of \d+[.] or [*+-], depending on list_type:
842         /*
843                 list_str = list_str.replace(/
844                         (^[ \t]*)                                               // leading whitespace = $1
845                         ({MARKER}) [ \t]+                       // list marker = $2
846                         ([^\r]+?                                                // list item text   = $3
847                         (\n+))
848                         (?= (~0 | \2 ({MARKER}) [ \t]+))
849                 /gm, function(){...});
850         */
851     
852     var marker = _listItemMarkers[list_type];
853     var re = new RegExp("(^[ \\t]*)(" + marker + ")[ \\t]+([^\\r]+?(\\n+))(?=(~0|\\1(" + marker + ")[ \\t]+))", "gm");
854     var last_item_had_a_double_newline = false;
855         list_str = list_str.replace(re,
856                 function(wholeMatch,m1,m2,m3){
857                         var item = m3;
858                         var leading_space = m1;
859             var ends_with_double_newline = /\n\n$/.test(item);
860                         var contains_double_newline = ends_with_double_newline || item.search(/\n{2,}/)>-1;
861
862                         if (contains_double_newline || last_item_had_a_double_newline) {
863                                 item =  _RunBlockGamut(_Outdent(item), /* doNotUnhash = */ true);
864                         }
865                         else {
866                                 // Recursion for sub-lists:
867                                 item = _DoLists(_Outdent(item));
868                                 item = item.replace(/\n$/,""); // chomp(item)
869                                 item = _RunSpanGamut(item);
870             }
871             last_item_had_a_double_newline = ends_with_double_newline;
872                         return  "<li>" + item + "</li>\n";
873                 }
874         );
875
876         // attacklab: strip sentinel
877         list_str = list_str.replace(/~0/g,"");
878
879         g_list_level--;
880         return list_str;
881 }
882
883
884 var _DoCodeBlocks = function(text) {
885 //
886 //  Process Markdown `<pre><code>` blocks.
887 //  
888
889         /*
890                 text = text.replace(text,
891                         /(?:\n\n|^)
892                         (                                                               // $1 = the code block -- one or more lines, starting with a space/tab
893                                 (?:
894                                         (?:[ ]{4}|\t)                   // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
895                                         .*\n+
896                                 )+
897                         )
898                         (\n*[ ]{0,3}[^ \t\n]|(?=~0))    // attacklab: g_tab_width
899                 /g,function(){...});
900         */
901
902         // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
903         text += "~0";
904         
905         text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
906                 function(wholeMatch,m1,m2) {
907                         var codeblock = m1;
908                         var nextChar = m2;
909                 
910                         codeblock = _EncodeCode( _Outdent(codeblock));
911                         codeblock = _Detab(codeblock);
912                         codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
913                         codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
914
915                         codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
916
917                         return "\n\n" + codeblock + "\n\n" + nextChar;
918                 }
919         );
920
921         // attacklab: strip sentinel
922         text = text.replace(/~0/,"");
923
924         return text;
925 }
926
927 var hashBlock = function(text) {
928         text = text.replace(/(^\n+|\n+$)/g,"");
929         return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
930 }
931
932
933 var _DoCodeSpans = function(text) {
934 //
935 //   *  Backtick quotes are used for <code></code> spans.
936 // 
937 //   *  You can use multiple backticks as the delimiters if you want to
938 //       include literal backticks in the code span. So, this input:
939 //       
940 //               Just type ``foo `bar` baz`` at the prompt.
941 //       
942 //         Will translate to:
943 //       
944 //               <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
945 //       
946 //      There's no arbitrary limit to the number of backticks you
947 //      can use as delimters. If you need three consecutive backticks
948 //      in your code, use four for delimiters, etc.
949 //
950 //  *  You can use spaces to get literal backticks at the edges:
951 //       
952 //               ... type `` `bar` `` ...
953 //       
954 //         Turns to:
955 //       
956 //               ... type <code>`bar`</code> ...
957 //
958
959         /*
960                 text = text.replace(/
961                         (^|[^\\])                                       // Character before opening ` can't be a backslash
962                         (`+)                                            // $2 = Opening run of `
963                         (                                                       // $3 = The code block
964                                 [^\r]*?
965                                 [^`]                                    // attacklab: work around lack of lookbehind
966                         )
967                         \2                                                      // Matching closer
968                         (?!`)
969                 /gm, function(){...});
970         */
971
972         text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
973                 function(wholeMatch,m1,m2,m3,m4) {
974                         var c = m3;
975                         c = c.replace(/^([ \t]*)/g,""); // leading whitespace
976                         c = c.replace(/[ \t]*$/g,"");   // trailing whitespace
977                         c = _EncodeCode(c);
978                         return m1+"<code>"+c+"</code>";
979                 });
980
981         return text;
982 }
983
984
985 var _EncodeCode = function(text) {
986 //
987 // Encode/escape certain characters inside Markdown code runs.
988 // The point is that in code, these characters are literals,
989 // and lose their special Markdown meanings.
990 //
991         // Encode all ampersands; HTML entities are not
992         // entities within a Markdown code span.
993         text = text.replace(/&/g,"&amp;");
994
995         // Do the angle bracket song and dance:
996         text = text.replace(/</g,"&lt;");
997         text = text.replace(/>/g,"&gt;");
998
999         // Now, escape characters that are magic in Markdown:
1000         text = escapeCharacters(text,"\*_{}[]\\",false);
1001
1002 // jj the line above breaks this:
1003 //---
1004
1005 //* Item
1006
1007 //   1. Subitem
1008
1009 //            special char: *
1010 //---
1011
1012         return text;
1013 }
1014
1015
1016 var _DoItalicsAndBold = function(text) {
1017
1018         // <strong> must go first:
1019         text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\1/g,
1020                 "<strong>$2</strong>");
1021
1022         text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
1023                 "<em>$2</em>");
1024
1025         return text;
1026 }
1027
1028
1029 var _DoBlockQuotes = function(text) {
1030
1031         /*
1032                 text = text.replace(/
1033                 (                                                               // Wrap whole match in $1
1034                         (
1035                                 ^[ \t]*>[ \t]?                  // '>' at the start of a line
1036                                 .+\n                                    // rest of the first line
1037                                 (.+\n)*                                 // subsequent consecutive lines
1038                                 \n*                                             // blanks
1039                         )+
1040                 )
1041                 /gm, function(){...});
1042         */
1043
1044         text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1045                 function(wholeMatch,m1) {
1046                         var bq = m1;
1047
1048                         // attacklab: hack around Konqueror 3.5.4 bug:
1049                         // "----------bug".replace(/^-/g,"") == "bug"
1050
1051                         bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");       // trim one level of quoting
1052
1053                         // attacklab: clean up hack
1054                         bq = bq.replace(/~0/g,"");
1055
1056                         bq = bq.replace(/^[ \t]+$/gm,"");               // trim whitespace-only lines
1057                         bq = _RunBlockGamut(bq);                                // recurse
1058                         
1059                         bq = bq.replace(/(^|\n)/g,"$1  ");
1060                         // These leading spaces screw with <pre> content, so we need to fix that:
1061                         bq = bq.replace(
1062                                         /(\s*<pre>[^\r]+?<\/pre>)/gm,
1063                                 function(wholeMatch,m1) {
1064                                         var pre = m1;
1065                                         // attacklab: hack around Konqueror 3.5.4 bug:
1066                                         pre = pre.replace(/^  /mg,"~0");
1067                                         pre = pre.replace(/~0/g,"");
1068                                         return pre;
1069                                 });
1070                         
1071                         return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
1072                 });
1073         return text;
1074 }
1075
1076
1077 var _FormParagraphs = function(text, doNotUnhash) {
1078 //
1079 //  Params:
1080 //    $text - string to process with html <p> tags
1081 //
1082
1083         // Strip leading and trailing lines:
1084         text = text.replace(/^\n+/g,"");
1085         text = text.replace(/\n+$/g,"");
1086
1087         var grafs = text.split(/\n{2,}/g);
1088         var grafsOut = new Array();
1089
1090         //
1091         // Wrap <p> tags.
1092         //
1093         var end = grafs.length;
1094         for (var i=0; i<end; i++) {
1095                 var str = grafs[i];
1096
1097                 // if this is an HTML marker, copy it
1098                 if (str.search(/~K(\d+)K/g) >= 0) {
1099                         grafsOut.push(str);
1100                 }
1101                 else if (str.search(/\S/) >= 0) {
1102                         str = _RunSpanGamut(str);
1103                         str = str.replace(/^([ \t]*)/g,"<p>");
1104                         str += "</p>"
1105                         grafsOut.push(str);
1106                 }
1107
1108         }
1109         //
1110         // Unhashify HTML blocks
1111         //
1112     if (!doNotUnhash) {
1113         end = grafsOut.length;
1114             for (var i=0; i<end; i++) {
1115                     // if this is a marker for an html block...
1116                     while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
1117                             var blockText = g_html_blocks[RegExp.$1];
1118                             blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1119                             grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1120                     }
1121             }
1122     }
1123         return grafsOut.join("\n\n");
1124 }
1125
1126
1127 var _EncodeAmpsAndAngles = function(text) {
1128 // Smart processing for ampersands and angle brackets that need to be encoded.
1129         
1130         // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1131         //   http://bumppo.net/projects/amputator/
1132         text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
1133         
1134         // Encode naked <'s
1135         text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
1136         
1137         return text;
1138 }
1139
1140
1141 var _EncodeBackslashEscapes = function(text) {
1142 //
1143 //   Parameter:  String.
1144 //   Returns:   The string, with after processing the following backslash
1145 //                         escape sequences.
1146 //
1147
1148         // attacklab: The polite way to do this is with the new
1149         // escapeCharacters() function:
1150         //
1151         //      text = escapeCharacters(text,"\\",true);
1152         //      text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1153         //
1154         // ...but we're sidestepping its use of the (slow) RegExp constructor
1155         // as an optimization for Firefox.  This function gets called a LOT.
1156
1157         text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1158         text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1159         return text;
1160 }
1161
1162
1163 var _DoAutoLinks = function(text) {
1164
1165         text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
1166
1167         // Email addresses: <address@domain.foo>
1168
1169         /*
1170                 text = text.replace(/
1171                         <
1172                         (?:mailto:)?
1173                         (
1174                                 [-.\w]+
1175                                 \@
1176                                 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1177                         )
1178                         >
1179                 /gi, _DoAutoLinks_callback());
1180         */
1181         text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1182                 function(wholeMatch,m1) {
1183                         return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1184                 }
1185         );
1186
1187         return text;
1188 }
1189
1190
1191 var _EncodeEmailAddress = function(addr) {
1192 //
1193 //  Input: an email address, e.g. "foo@example.com"
1194 //
1195 //  Output: the email address as a mailto link, with each character
1196 //      of the address encoded as either a decimal or hex entity, in
1197 //      the hopes of foiling most address harvesting spam bots. E.g.:
1198 //
1199 //      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1200 //         x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1201 //         &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1202 //
1203 //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1204 //  mailing list: <http://tinyurl.com/yu7ue>
1205 //
1206
1207         // attacklab: why can't javascript speak hex?
1208         function char2hex(ch) {
1209                 var hexDigits = '0123456789ABCDEF';
1210                 var dec = ch.charCodeAt(0);
1211                 return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1212         }
1213
1214         var encode = [
1215                 function(ch){return "&#"+ch.charCodeAt(0)+";";},
1216                 function(ch){return "&#x"+char2hex(ch)+";";},
1217                 function(ch){return ch;}
1218         ];
1219
1220         addr = "mailto:" + addr;
1221
1222         addr = addr.replace(/./g, function(ch) {
1223                 if (ch == "@") {
1224                         // this *must* be encoded. I insist.
1225                         ch = encode[Math.floor(Math.random()*2)](ch);
1226                 } else if (ch !=":") {
1227                         // leave ':' alone (to spot mailto: later)
1228                         var r = Math.random();
1229                         // roughly 10% raw, 45% hex, 45% dec
1230                         ch =  (
1231                                         r > .9  ?       encode[2](ch)   :
1232                                         r > .45 ?       encode[1](ch)   :
1233                                                                 encode[0](ch)
1234                                 );
1235                 }
1236                 return ch;
1237         });
1238
1239         addr = "<a href=\"" + addr + "\">" + addr + "</a>";
1240         addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1241
1242         return addr;
1243 }
1244
1245
1246 var _UnescapeSpecialChars = function(text) {
1247 //
1248 // Swap back in all the special characters we've hidden.
1249 //
1250         text = text.replace(/~E(\d+)E/g,
1251                 function(wholeMatch,m1) {
1252                         var charCodeToReplace = parseInt(m1);
1253                         return String.fromCharCode(charCodeToReplace);
1254                 }
1255         );
1256         return text;
1257 }
1258
1259
1260 var _Outdent = function(text) {
1261 //
1262 // Remove one level of line-leading tabs or spaces
1263 //
1264
1265         // attacklab: hack around Konqueror 3.5.4 bug:
1266         // "----------bug".replace(/^-/g,"") == "bug"
1267
1268         text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1269
1270         // attacklab: clean up hack
1271         text = text.replace(/~0/g,"")
1272
1273         return text;
1274 }
1275
1276 var _Detab = function (text) {
1277         if (!/\t/.test(text))
1278                 return text;
1279
1280         var spaces = ["    ", "   ", "  ", " "],
1281                 skew = 0,
1282                 v;
1283
1284         return text.replace(/[\n\t]/g, function (match, offset) {
1285                 if (match === "\n") {
1286                         skew = offset + 1;
1287                         return match;
1288                 }
1289                 v = (offset - skew) % 4;
1290                 skew = offset + 1;
1291                 return spaces[v];
1292         });
1293 }
1294
1295 //
1296 //  attacklab: Utility functions
1297 //
1298
1299
1300 var escapeCharacters = function(text, charsToEscape, afterBackslash) {
1301         // First we have to escape the escape characters so that
1302         // we can build a character class out of them
1303         var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
1304
1305         if (afterBackslash) {
1306                 regexString = "\\\\" + regexString;
1307         }
1308
1309         var regex = new RegExp(regexString,"g");
1310         text = text.replace(regex,escapeCharacters_callback);
1311
1312         return text;
1313 }
1314
1315
1316 var escapeCharacters_callback = function(wholeMatch,m1) {
1317         var charCodeToEscape = m1.charCodeAt(0);
1318         return "~E"+charCodeToEscape+"E";
1319 }
1320
1321 } // end of Attacklab.showdown.converter
1322
1323
1324 // Version 0.9 used the Showdown namespace instead of Attacklab.showdown
1325 // The old namespace is deprecated, but we'll support it for now:
1326 var Showdown = Attacklab.showdown;
1327
1328 // If anyone's interested, tell the world that this file's been loaded
1329 if (Attacklab.fileLoaded) {
1330         Attacklab.fileLoaded("showdown.js");
1331 }