You are here: Home > Latest news from Darcs > Imports Mediacloth's revision r146

Revision 20080724215655-9043f-2cc22a...

Imports Mediacloth's revision r146

Sanitizes ill-formed tags

vendor/mediacloth-trunk/lib/mediacloth/mediawikilexer.rb
vendor/mediacloth-trunk/test/lexer.rb
vendor/mediacloth-trunk/test/sanitization.rb

Changes to mediawikilexer.rb

281
281
282
  def match_left_angle
  def match_left_angle
282
283
    scanner = StringScanner.new(@text[@cursor .. -1])
    scanner = StringScanner.new(@text[@cursor .. -1])
283
284
    if scanner.scan(%r{<(\/?)([^\s<>\/]+)([^>]*)>})
    if scanner.scan(%r{<(\/?)([^\s<>\/]+)([^>]*)>}) and !scanner[3].include?(?<)
284
285
      #XHTML start or end tag, or just something surrounded by angle brackets
      #XHTML start or end tag, or just something surrounded by angle brackets
285
286
      tag_close_char, tag_name, attrs_string = scanner[1], scanner[2], scanner[3]
      tag_close_char, tag_name, attrs_string = scanner[1], scanner[2], scanner[3]
286
287
      if ELEMENT_WHITELIST.include?(tag_name.downcase)
      if ELEMENT_WHITELIST.include?(tag_name.downcase)
287

Changes to lexer.rb

564
      lex("<123>"))
      lex("<123>"))
564
565
    assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
    assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
565
566
      lex("<xx xx>"))
      lex("<xx xx>"))
566
567
    assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx </xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
    assert_equal([[:PARA_START, ""], [:TEXT, "<xx "], [:CHAR_ENT, "lt"], [:TEXT, "/xx"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
567
568
      lex("<xx </xx>"))
      lex("<xx </xx>"))
568
569
    assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx a='b' c"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
    assert_equal([[:PARA_START, ""], [:CHAR_ENT, "lt"], [:TEXT, "xx a='b' c"], [:CHAR_ENT, "gt"], [:PARA_END, ""], [false, false]],
569
570
      lex("<xx a='b' c>"))
      lex("<xx a='b' c>"))
570

Changes to sanitization.rb

180
  end
  end
180
181
181
182
  def test_removes_on_attributes_even_from_legal_tags
  def test_removes_on_attributes_even_from_legal_tags
182
183
    assert_sanitizes_to %{Here is some <b >bold</b> text},
    assert_sanitizes_to "Here is some <b >bold</b> text",
183
184
                        %{Here is some <b onMouseOver="alert('Cuidado!')">bold</b> text}
                        "Here is some <b onMouseOver=\"alert('Watch out!')\">bold</b> text"
184
 
  end
185
 
186
 
  def test_sanitizes_broken_tags
187
 
    assert_sanitizes_to "&lt;strike &lt;invalid&gt;Striked?&lt;/strike&gt;",
188
 
                        "<strike <invalid>Striked?</strike>"
189
 
  end
190
 
191
 
  def test_escapes_lt_and_gt_characters
192
 
    assert_sanitizes_to "3 &gt; 0 and 1 &lt; 2",
193
 
                        "3 > 0 and 1 < 2"
194
185
  end
  end
195
186
196
187
private
private
197