Imports Mediacloth revisions r142-145
Sanitizes without second pass and uses html5's whitelist
| 1 | require 'strscan' | require 'strscan' | 1 |
|---|---|---|---|
| 2 | 2 | ||
require 'rubygems' | 3 | ||
require 'html5/sanitizer' | 4 | ||
| 5 | |||
| 3 | class String | class String | 6 |
| 4 | def is_empty_token? | def is_empty_token? | 7 |
| 5 | self.size == 0 or self == "\n" or self == "\r\n" | self.size == 0 or self == "\n" or self == "\r\n" | 8 |
| 144 more lines | |||
| 150 | @lexer_table.push(@default_lexer_table) | @lexer_table.push(@default_lexer_table) | 153 |
| 151 | end | end | 154 |
| 152 | 155 | ||
| 153 | WHITELIST = %w{del ins b i em u s strike font | ||
| 154 | big small sub sup cite code tt var strong | ||
| 155 | span h1 h2 h3 h4 h5 h6 div center | ||
| 156 | blockquote ol li ul table tr th td | ||
| 157 | ruby rb rp rt p br hr dl dt dd | ||
| 158 | pre nowiki math} | ||
| 159 | |||
| 160 | # Sanitizes thw raw wiki input for dangerous HTML tags | ||
| 161 | def sanitize(input) | ||
| 162 | input.gsub(/<(\/?)([^\s>\/]+)([^>]*)>/) do | ||
| 163 | atts = clean_attributes($3) | ||
| 164 | WHITELIST.include?($2.downcase) ? "<#{$1}#{$2}#{atts}>" : | ||
| 165 | "<#{$1}#{$2}#{$3}>" | ||
| 166 | end | ||
| 167 | end | ||
| 168 | |||
| 169 | def clean_attributes(input) | ||
| 170 | input.gsub(/on[^=]*=(['|"])[^\1]*\1/, '') | ||
| 171 | end | ||
| 172 | 156 | ||
| 173 | def tokenize(input) | def tokenize(input) | 157 |
| 174 | @text = sanitize(input) | @text = input | 158 |
| 175 | # Current position in the input text | # Current position in the input text | 159 |
| 176 | @cursor = 0 | @cursor = 0 | 160 |
| 177 | # Tokens to be returned | # Tokens to be returned | 161 |
| 110 more lines | |||
| 288 | match_text | match_text | 272 |
| 289 | end | end | 273 |
| 290 | end | end | 274 |
| 291 | 275 | ||
ELEMENT_WHITELIST = HTML5::HTMLSanitizeModule::ALLOWED_ELEMENTS - | 276 | ||
%w{form input thead tbody label} + | 277 | ||
%w{nowiki ruby rp rb rt} | 278 | ||
| 279 | |||
ATTRIBUTE_WHITELIST = HTML5::HTMLSanitizeModule::ALLOWED_ATTRIBUTES + %w{face} | 280 | ||
| 281 | |||
| 292 | def match_left_angle | def match_left_angle | 282 |
| 293 | next_char = @text[@cursor + 1] | scanner = StringScanner.new(@text[@cursor .. -1]) | 283 |
| 294 | if next_char == 47 | if scanner.scan(%r{<(\/?)([^\s<>\/]+)([^>]*)>}) | 284 |
| 295 | # Might be an XHTML end tag | #XHTML start or end tag, or just something surrounded by angle brackets | 285 |
| 296 | if @text[@cursor .. -1] =~ %r{</([a-zA-Z][a-zA-Z0-9\-_]*)(\s*)>} and @context.include?(:TAG) | tag_close_char, tag_name, attrs_string = scanner[1], scanner[2], scanner[3] | 286 |
| 297 | # Found an XHTML end tag | if ELEMENT_WHITELIST.include?(tag_name.downcase) | 287 |
| 298 | tag_name = $1 | if tag_close_char == '/' and @context.include?(:TAG) | 288 |
| 299 | end_span(:TAG, $1) | # Found an XHTML end tag | 289 |
| 300 | @lexer_table.pop | end_span(:TAG, tag_name) | 290 |
| 301 | @cursor += $1.length + $2.length + 3 | @lexer_table.pop | 291 |
| 302 | else | elsif tag_close_char.empty? | 292 |
| 303 | match_text | ||
| 304 | end | ||
| 305 | elsif next_char > 64 and next_char < 123 | ||
| 306 | # Might be an XHTML open or empty tag | ||
| 307 | scanner = StringScanner.new(@text[@cursor .. -1]) | ||
| 308 | if scanner.scan(%r{<([a-zA-Z][a-zA-Z0-9\-_]*)}) | ||
| 309 | # Sequence begins with a valid tag name, so check for attributes | ||
| 310 | tag_name = scanner[1] | ||
| 311 | attrs = {} | ||
| 312 | while scanner.scan(%r{\s+([a-zA-Z][a-zA-Z0-9\-_]*)\s*=\s*('([^']+)'|"([^"]+)")}) do | ||
| 313 | attrs[scanner[1]] = scanner[3] ? scanner[3] : scanner[4] | ||
| 314 | end | ||
| 315 | scanner.scan(%r{\s*}) | ||
| 316 | if ((c = scanner.get_byte) == '>' or (c == '/' and scanner.get_byte == '>')) | ||
| 317 | # Found an XHTML start or empty tag | # Found an XHTML start or empty tag | 293 |
attrs = {} | 294 | ||
attrs_string.scan(/\s+([a-zA-Z][a-zA-Z0-9\-_]*)\s*=\s*(\'([^\']+)'|\"([^\"]+)\")/) do | 295 | ||
|name, value, sq_value, db_value| | 296 | ||
attrs[name] = sq_value || db_value if ATTRIBUTE_WHITELIST.include?(name.downcase) | 297 | ||
end | 298 | ||
| 299 | |||
empty_tag = attrs_string[-1] == ?/ | 300 | ||
| 301 | |||
| 318 | if tag_name == 'nowiki' | if tag_name == 'nowiki' | 302 |
| 319 | @lexer_table.push(@nowiki_lexer_table) unless c == '/' | @lexer_table.push(@nowiki_lexer_table) unless empty_tag | 303 |
| 320 | else | else | 304 |
| 321 | if tag_name == 'pre' | if tag_name == 'pre' | 305 |
| 322 | table = @pre_lexer_table | table = @pre_lexer_table | 306 |
| 7 more lines | |||
| 330 | append_to_tokens([:ATTR_NAME, name]) | append_to_tokens([:ATTR_NAME, name]) | 314 |
| 331 | append_to_tokens([:ATTR_VALUE, value]) if value | append_to_tokens([:ATTR_VALUE, value]) if value | 315 |
| 332 | end | end | 316 |
| 333 | if c == '/' | if empty_tag then | 317 |
| 334 | end_span(:TAG, tag_name) | end_span(:TAG, tag_name) | 318 |
| 335 | else | else | 319 |
| 336 | @lexer_table.push(table) | @lexer_table.push(table) | 320 |
| 337 | end | end | 321 |
| 338 | end | end | 322 |
| 339 | @cursor += scanner.pos | else | 323 |
| 340 | else | append_to_tokens([:CHAR_ENT, 'lt']) | 324 |
| 341 | match_text | append_to_tokens([:TEXT, tag_close_char + tag_name + attrs_string]) | 325 |
| 342 | end | append_to_tokens([:CHAR_ENT, 'gt']) | 326 |
end | 327 | ||
| 343 | else | else | 328 |
| 344 | match_text | append_to_tokens([:CHAR_ENT, 'lt']) | 329 |
append_to_tokens([:TEXT, tag_close_char + tag_name + attrs_string]) | 330 | ||
append_to_tokens([:CHAR_ENT, 'gt']) | 331 | ||
| 345 | end | end | 332 |
@cursor += 2 + tag_close_char.length + tag_name.length + attrs_string.length | 333 | ||
| 346 | else | else | 334 |
| 347 | match_text | match_text | 335 |
| 348 | end | end | 336 |
| 10 | s.summary = "A MediaWiki syntax parser and HTML generator." | s.summary = "A MediaWiki syntax parser and HTML generator." | 10 |
|---|---|---|---|
| 11 | 11 | ||
| 12 | s.add_dependency('builder', '>= 2.1.2') | s.add_dependency('builder', '>= 2.1.2') | 12 |
s.add_dependency('html5', '>= 0.10.0') | 13 | ||
| 13 | 14 | ||
| 14 | candidates = Dir.glob("{bin,docs,lib,test}/**/*") | candidates = Dir.glob("{bin,docs,lib,test}/**/*") | 15 |
| 15 | s.files = candidates.delete_if do |item| | s.files = candidates.delete_if do |item| | 16 |
| 6 | require 'test/unit' | require 'test/unit' | 6 |
|---|---|---|---|
| 7 | require 'testhelper' | require 'testhelper' | 7 |
| 8 | 8 | ||
| 9 | require 'hpricot' | ||
| 10 | |||
| 11 | class HTMLGenerator_Test < Test::Unit::TestCase | class HTMLGenerator_Test < Test::Unit::TestCase | 9 |
| 12 | 10 | ||
| 13 | include TestHelper | include TestHelper | 11 |
| 47 more lines | |||
| 61 | CategoryDirectiveHandler.new | CategoryDirectiveHandler.new | 59 |
| 62 | end | end | 60 |
| 63 | 61 | ||
| 64 | private | ||
| 65 | |||
| 66 | def assert_generates(result, input, link_handler=nil, message=nil) | ||
| 67 | parser = MediaWikiParser.new | ||
| 68 | parser.lexer = MediaWikiLexer.new | ||
| 69 | ast = parser.parse(input) | ||
| 70 | MediaWikiParams.instance.time = Time.utc(2000, 1, 1, 1, 1, 1, 1) | ||
| 71 | generator = MediaWikiHTMLGenerator.new | ||
| 72 | generator.link_handler = link_handler if link_handler | ||
| 73 | generator.parse(ast) | ||
| 74 | assert_same_html(result, generator.html, message) | ||
| 75 | end | ||
| 76 | |||
| 77 | def assert_same_html(expected, result, message) | ||
| 78 | assert_equal(Hpricot(expected).to_s, Hpricot(result).to_s, message) | ||
| 79 | end | ||
| 80 | end | end | 62 |
| 81 | 63 | ||
| 82 | class LinkAttributeHandler < MediaWikiLinkHandler | class LinkAttributeHandler < MediaWikiLinkHandler | 64 |
| 391 | lex("<tt/>")) | lex("<tt/>")) | 391 |
|---|---|---|---|
| 392 | assert_equal([[:PARA_START, ""], [:TAG_START, "tt"], [:TAG_END, "tt"], [:PARA_END, ""], [false, false]], | assert_equal([[:PARA_START, ""], [:TAG_START, "tt"], [:TAG_END, "tt"], [:PARA_END, ""], [false, false]], | 392 |
| 393 | lex("<tt />")) | lex("<tt />")) | 393 |
| 394 | assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "123"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | ||
| 395 | lex("<123>")) | ||
| 396 | assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | ||
| 397 | lex("<xx xx>")) | ||
| 398 | assert_equal([[:PARA_START, ""], [:TEXT, "</xxx "], [:PARA_END, ""], [false, false]], | assert_equal([[:PARA_START, ""], [:TEXT, "</xxx "], [:PARA_END, ""], [false, false]], | 394 |
| 399 | lex("</xxx ")) | lex("</xxx ")) | 395 |
| 400 | assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx </xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | ||
| 401 | lex("<xx </xx>")) | ||
| 402 | assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx a='b' c"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | ||
| 403 | lex("<xx a='b' c>")) | ||
| 404 | assert_equal([[:PARA_START, ""], [:TEXT, "<>"], [:PARA_END, ""], [false, false]], | assert_equal([[:PARA_START, ""], [:TEXT, "<>"], [:PARA_END, ""], [false, false]], | 396 |
| 405 | lex("<>")) | lex("<>")) | 397 |
| 406 | assert_equal([[:PARA_START, ""], [:TAG_START, "tt"], [:ATTR_NAME, 'class'], [:ATTR_VALUE, 'tt'], | assert_equal([[:PARA_START, ""], [:TAG_START, "tt"], [:ATTR_NAME, 'class'], [:ATTR_VALUE, 'tt'], | 398 |
| 160 more lines | |||
| 567 | lex(";a:[[resource:text]]\n")) | lex(";a:[[resource:text]]\n")) | 559 |
| 568 | end | end | 560 |
| 569 | 561 | ||
def test_invalid_tags | 562 | ||
assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "123"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | 563 | ||
lex("<123>")) | 564 | ||
assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | 565 | ||
lex("<xx xx>")) | 566 | ||
assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx </xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | 567 | ||
lex("<xx </xx>")) | 568 | ||
assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx a='b' c"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | 569 | ||
lex("<xx a='b' c>")) | 570 | ||
assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "invalid /"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]], | 571 | ||
lex("<invalid />")) | 572 | ||
end | 573 | ||
| 570 | 574 | ||
| 571 | private | private | 575 |
| 572 | 576 | ||
| 5 | 5 | ||
|---|---|---|---|
| 6 | class SanitizationTest < Test::Unit::TestCase | class SanitizationTest < Test::Unit::TestCase | 6 |
| 7 | 7 | ||
include TestHelper | 8 | ||
| 9 | |||
| 8 | def setup | def setup | 10 |
| 9 | @@lexer ||= MediaWikiLexer.new | @@lexer ||= MediaWikiLexer.new | 11 |
| 10 | end | end | 12 |
| 12 more lines | |||
| 23 | end | end | 25 |
| 24 | 26 | ||
| 25 | def test_keeps_underline_and_strikethrough_tags | def test_keeps_underline_and_strikethrough_tags | 27 |
| 26 | assert_no_sanitization "This is very <u>important</u>, but that | assert_no_sanitization "This is very <u>important</u>, but that " + | 28 |
| 27 | can <s>safely</s> be <strike>ignored</strike>" | "can <s>safely</s> be <strike>ignored</strike>" | 29 |
| 28 | end | end | 30 |
| 29 | 31 | ||
| 30 | def test_keeps_font_tags | def test_keeps_font_tags | 32 |
| 9 more lines | |||
| 40 | end | end | 42 |
| 41 | 43 | ||
| 42 | def test_keeps_citation_tags | def test_keeps_citation_tags | 44 |
| 43 | assert_no_sanitization %{<cite>"Perfection is achieved, not when there is | assert_no_sanitization "<cite>Perfection is achieved, not when there is" + | 45 |
| 44 | nothing left to add, but when there is | " nothing left to add, but when there is" + | 46 |
| 45 | nothing left to remove."</cite> | " nothing left to remove.</cite>" + | 47 |
| 46 | -- Antoine de Saint-Exupery} | " -- Antoine de Saint-Exupery" | 48 |
| 47 | end | end | 49 |
| 48 | 50 | ||
| 49 | def test_keeps_code_and_teletype | def test_keeps_code_and_teletype | 51 |
| 50 | assert_no_sanitization "Text inside <code>code</code> and <tt>teletype</tt> | assert_no_sanitization "Text inside <code>code</code> and <tt>teletype</tt> " + | 52 |
| 51 | usually get rendered with a fixed width font" | "usually get rendered with a fixed width font" | 53 |
| 52 | end | end | 54 |
| 53 | 55 | ||
| 54 | def test_keeps_variable_tags | def test_keeps_variable_tags | 56 |
| 5 more lines | |||
| 60 | end | end | 62 |
| 61 | 63 | ||
| 62 | def test_keeps_spans | def test_keeps_spans | 64 |
| 63 | assert_no_sanitization %{Most environments will render | assert_no_sanitization "Most environments will render " + | 65 |
| 64 | <span style="color: red">this text</span> with | "<span style=\"color: red\">this text</span> with " + | 66 |
| 65 | different colours} | "different colours" | 67 |
| 66 | end | end | 68 |
| 67 | 69 | ||
| 68 | def test_keeps_headings | def test_keeps_headings | 70 |
| 69 | assert_no_sanitization "<h1>Heading 1</h1> | assert_no_sanitization "<h1>Heading 1</h1>" + | 71 |
| 70 | <h2>Heading 2</h2> | "<h2>Heading 2</h2>" + | 72 |
| 71 | <h3>Heading 3</h3> | "<h3>Heading 3</h3>" + | 73 |
| 72 | <h4>Heading 4</h4> | "<h4>Heading 4</h4>" + | 74 |
| 73 | <h5>Heading 5</h5> | "<h5>Heading 5</h5>" + | 75 |
| 74 | <h6>Heading 6</h6>" | "<h6>Heading 6</h6>" | 76 |
| 75 | end | end | 77 |
| 76 | 78 | ||
| 77 | def test_keeps_divs | def test_keeps_divs | 79 |
| 9 more lines | |||
| 87 | end | end | 89 |
| 88 | 90 | ||
| 89 | def test_keeps_ordered_and_unordered_lists | def test_keeps_ordered_and_unordered_lists | 91 |
| 90 | assert_no_sanitization "<ol> | assert_no_sanitization "<ol>" + | 92 |
| 91 | <li>Ordered</li> | "<li>Ordered</li>" + | 93 |
| 92 | <li>List</li> | "<li>List</li>" + | 94 |
| 93 | <li>(And list items)</li> | "<li>(And list items)</li>" + | 95 |
| 94 | </ol> | "</ol>" + | 96 |
| 95 | "<ul>" + | 97 | |
| 96 | <ul> | "<li>Unordered</li>" + | 98 |
| 97 | <li>Unordered</li> | "<li>List</li>" + | 99 |
| 98 | <li>List</li> | "<li>(And list items)</li>" + | 100 |
| 99 | <li>(And list items)</li> | "</ul>" | 101 |
| 100 | </ul>" | ||
| 101 | end | end | 102 |
| 102 | 103 | ||
| 103 | def test_keeps_table_and_main_components | def test_keeps_table_and_main_components | 104 |
| 104 | assert_no_sanitization "<table> | assert_no_sanitization "<table>" + | 105 |
| 105 | <tr><th>Table</th> <th>tag</th> <th /></tr> | "<tr><th>Table</th> <th>tag</th> <th /></tr>" + | 106 |
| 106 | <tr><td>and</td> <td>its</td> <td>components</td></tr> | "<tr><td>and</td> <td>its</td> <td>components</td></tr>" + | 107 |
| 107 | <tr><td>including</td><td>header</td><td>tags</td></tr> | "<tr><td>including</td><td>header</td><td>tags</td></tr>" + | 108 |
| 108 | </table>" | "</table>" | 109 |
| 109 | end | end | 110 |
| 110 | 111 | ||
| 111 | def test_keeps_ruby_tag_and_components | def test_keeps_ruby_tag_and_components | 112 |
| 112 | assert_no_sanitization "<ruby> | assert_no_sanitization "<ruby>" + | 113 |
| 113 | <rb>Ruby base</rb> | "<rb>Ruby base</rb>" + | 114 |
| 114 | <rp>(</rp> | "<rp>(</rp>" + | 115 |
| 115 | <rt>Ruby text</rt> | "<rt>Ruby text</rt>" + | 116 |
| 116 | <rp>)</rp> | "<rp>)</rp>" + | 117 |
| 117 | </ruby>" | "</ruby>" | 118 |
| 118 | end | end | 119 |
| 119 | 120 | ||
| 120 | def test_keeps_paragraph_tags | def test_keeps_paragraph_tags | 121 |
| 1 more lines | |||
| 122 | end | end | 123 |
| 123 | 124 | ||
| 124 | def test_keeps_linebreaks | def test_keeps_linebreaks | 125 |
| 125 | assert_no_sanitization "Break lines with an empty element<br /><br/> | assert_no_sanitization "Break lines with an empty element<br /><br/>" | 126 |
| 126 | Or using the opening tag only <br>" | ||
| 127 | end | end | 127 |
| 128 | 128 | ||
| 129 | def test_keeps_horizontal_rules | def test_keeps_horizontal_rules | 129 |
| 1 more lines | |||
| 131 | end | end | 131 |
| 132 | 132 | ||
| 133 | def test_keeps_definition_lists | def test_keeps_definition_lists | 133 |
| 134 | assert_no_sanitization "<dl> | assert_no_sanitization "<dl>" + | 134 |
| 135 | <dt>Definition terms</dt> | "<dt>Definition terms</dt>" + | 135 |
| 136 | <dd>And descriptions</dt> | "<dd>And descriptions</dd>" + | 136 |
| 137 | </dl>" | "</dl>" | 137 |
| 138 | end | end | 138 |
| 139 | 139 | ||
| 140 | def test_keeps_preformatted_text | def test_keeps_preformatted_text | 140 |
| 141 | assert_no_sanitization "<pre>Preformatted\ntext</pre>" | assert_no_sanitization "<pre>Preformatted\ntext</pre>" | 141 |
| 142 | end | end | 142 |
| 143 | 143 | ||
| 144 | def test_keeps_nowiki_tags | def test_keeps_nowiki_tags_and_sanitizes_inside | 144 |
| 145 | assert_no_sanitization "<nowiki>No wiki tag</nowiki>" | assert_sanitizes_to "No <yy>wiki</yy> ''tag''", | 145 |
"<nowiki>No <yy>wiki</yy> ''tag''</nowiki>" | 146 | ||
| 146 | end | end | 147 |
| 147 | 148 | ||
| 148 | def test_keeps_math_tags | def test_keeps_math_tags | 149 |
| 6 more lines | |||
| 155 | end | end | 156 |
| 156 | 157 | ||
| 157 | def test_sanitizes_form_label_and_input_tags | def test_sanitizes_form_label_and_input_tags | 158 |
| 158 | assert_sanitizes_to %{<form action="/send" method="post"> | assert_sanitizes_to "<form action="/send" method="post">" + | 159 |
| 159 | <label for="username">Username</label> | "<label for="username">Username</label>" + | 160 |
| 160 | <input name="login" id="username" /> | "<input name="login" id="username" />" + | 161 |
| 161 | </form>}, | "</form>", | 162 |
| 162 | %{<form action="/send" method="post"> | "<form action=\"/send\" method=\"post\">" + | 163 |
| 163 | <label for="username">Username</label> | "<label for=\"username\">Username</label>" + | 164 |
| 164 | <input name="login" id="username" /> | "<input name=\"login\" id=\"username\" />" + | 165 |
| 165 | </form>} | "</form>" | 166 |
| 166 | end | end | 167 |
| 167 | 168 | ||
| 168 | def test_keeps_and_sanitizes_with_spaces_before_the_closing_bracket | def test_keeps_and_sanitizes_with_spaces_before_the_closing_bracket | 169 |
| 169 | assert_sanitizes_to %{Here is some <b >bold</b> and <em>emphasized</em > | assert_sanitizes_to "Here is some <b >bold</b> and <em>emphasized</em >" + | 170 |
| 170 | text. But <script type="text/javascript" >alert('scripts')</script > | " text. But <script type="text/javascript" >" + | 171 |
| 171 | get sanitized}, | "alert('scripts')</script > get sanitized", | 172 |
| 172 | %{Here is some <b >bold</b> and <em>emphasized</em > | "Here is some <b >bold</b> and <em>emphasized</em >" + | 173 |
| 173 | text. But <script type="text/javascript" >alert('scripts')</script > | " text. But <script type=\"text/javascript\" >" + | 174 |
| 174 | get sanitized} | "alert('scripts')</script > get sanitized" | 175 |
| 175 | end | end | 176 |
| 176 | 177 | ||
| 177 | def test_ignores_case_for_whitelisted_tags | def test_ignores_case_for_whitelisted_tags | 178 |
| 178 | assert_no_sanitization "<SUP>Superscript</sup> and <CODE>code</CODE>" | assert_no_sanitization "<SUP>Superscript</SUP> and <CODE>code</CODE>" | 179 |
| 179 | end | end | 180 |
| 180 | 181 | ||
| 181 | def test_removes_on_attributes_even_from_legal_tags | def test_removes_on_attributes_even_from_legal_tags | 182 |
| 4 more lines | |||
| 186 | private | private | 187 |
| 187 | 188 | ||
| 188 | def assert_sanitizes_to(expected, actual) | def assert_sanitizes_to(expected, actual) | 189 |
| 189 | assert_equal expected, @@lexer.sanitize(actual) | assert_generates("<p>#{expected}</p>", actual) | 190 |
| 190 | end | end | 191 |
| 191 | 192 | ||
| 192 | def assert_no_sanitization(expected) | def assert_no_sanitization(expected) | 193 |
require 'hpricot' | 1 | ||
|---|---|---|---|
| 2 | |||
| 1 | module TestHelper | module TestHelper | 3 |
| 2 | 4 | ||
| 3 | #Helper method for file-based comparison tests. | #Helper method for file-based comparison tests. | 5 |
| 20 more lines | |||
| 24 | end | end | 26 |
| 25 | end | end | 27 |
| 26 | end | end | 28 |
| 29 | |||
def assert_generates(result, input, link_handler=nil, message=nil) | 30 | ||
parser = MediaWikiParser.new | 31 | ||
parser.lexer = MediaWikiLexer.new | 32 | ||
ast = parser.parse(input) | 33 | ||
MediaWikiParams.instance.time = Time.utc(2000, 1, 1, 1, 1, 1, 1) | 34 | ||
generator = MediaWikiHTMLGenerator.new | 35 | ||
generator.link_handler = link_handler if link_handler | 36 | ||
generator.parse(ast) | 37 | ||
assert_same_html(result, generator.html, message) | 38 | ||
end | 39 | ||
| 40 | |||
def assert_same_html(expected, result, message) | 41 | ||
assert_equal(Hpricot(expected).to_s, Hpricot(result).to_s, message) | 42 | ||
end | 43 | ||
| 44 | |||
| 27 | end | end | 45 |