| | | module HTML5 | 1 |
|---|
| | | | 2 |
|---|
| | | class EOF < Exception; end | 3 |
|---|
| | | | 4 |
|---|
| | | def self._(str); str end | 5 |
|---|
| | | | 6 |
|---|
| | | CONTENT_MODEL_FLAGS = [ | 7 |
|---|
| | | :PCDATA, | 8 |
|---|
| | | :RCDATA, | 9 |
|---|
| | | :CDATA, | 10 |
|---|
| | | :PLAINTEXT | 11 |
|---|
| | | ] | 12 |
|---|
| | | | 13 |
|---|
| | | SCOPING_ELEMENTS = %w[ | 14 |
|---|
| | | button | 15 |
|---|
| | | caption | 16 |
|---|
| | | html | 17 |
|---|
| | | marquee | 18 |
|---|
| | | object | 19 |
|---|
| | | table | 20 |
|---|
| | | td | 21 |
|---|
| | | th | 22 |
|---|
| | | ] | 23 |
|---|
| | | | 24 |
|---|
| | | FORMATTING_ELEMENTS = %w[ | 25 |
|---|
| | | a | 26 |
|---|
| | | b | 27 |
|---|
| | | big | 28 |
|---|
| | | em | 29 |
|---|
| | | font | 30 |
|---|
| | | i | 31 |
|---|
| | | nobr | 32 |
|---|
| | | s | 33 |
|---|
| | | small | 34 |
|---|
| | | strike | 35 |
|---|
| | | strong | 36 |
|---|
| | | tt | 37 |
|---|
| | | u | 38 |
|---|
| | | ] | 39 |
|---|
| | | | 40 |
|---|
| | | SPECIAL_ELEMENTS = %w[ | 41 |
|---|
| | | address | 42 |
|---|
| | | area | 43 |
|---|
| | | base | 44 |
|---|
| | | basefont | 45 |
|---|
| | | bgsound | 46 |
|---|
| | | blockquote | 47 |
|---|
| | | body | 48 |
|---|
| | | br | 49 |
|---|
| | | center | 50 |
|---|
| | | col | 51 |
|---|
| | | colgroup | 52 |
|---|
| | | dd | 53 |
|---|
| | | dir | 54 |
|---|
| | | div | 55 |
|---|
| | | dl | 56 |
|---|
| | | dt | 57 |
|---|
| | | embed | 58 |
|---|
| | | fieldset | 59 |
|---|
| | | form | 60 |
|---|
| | | frame | 61 |
|---|
| | | frameset | 62 |
|---|
| | | h1 | 63 |
|---|
| | | h2 | 64 |
|---|
| | | h3 | 65 |
|---|
| | | h4 | 66 |
|---|
| | | h5 | 67 |
|---|
| | | h6 | 68 |
|---|
| | | head | 69 |
|---|
| | | hr | 70 |
|---|
| | | iframe | 71 |
|---|
| | | image | 72 |
|---|
| | | img | 73 |
|---|
| | | input | 74 |
|---|
| | | isindex | 75 |
|---|
| | | li | 76 |
|---|
| | | link | 77 |
|---|
| | | listing | 78 |
|---|
| | | menu | 79 |
|---|
| | | meta | 80 |
|---|
| | | noembed | 81 |
|---|
| | | noframes | 82 |
|---|
| | | noscript | 83 |
|---|
| | | ol | 84 |
|---|
| | | optgroup | 85 |
|---|
| | | option | 86 |
|---|
| | | p | 87 |
|---|
| | | param | 88 |
|---|
| | | plaintext | 89 |
|---|
| | | pre | 90 |
|---|
| | | script | 91 |
|---|
| | | select | 92 |
|---|
| | | spacer | 93 |
|---|
| | | style | 94 |
|---|
| | | tbody | 95 |
|---|
| | | textarea | 96 |
|---|
| | | tfoot | 97 |
|---|
| | | thead | 98 |
|---|
| | | title | 99 |
|---|
| | | tr | 100 |
|---|
| | | ul | 101 |
|---|
| | | wbr | 102 |
|---|
| | | ] | 103 |
|---|
| | | | 104 |
|---|
| | | SPACE_CHARACTERS = %W[ | 105 |
|---|
| | | \t | 106 |
|---|
| | | \n | 107 |
|---|
| | | \x0B | 108 |
|---|
| | | \x0C | 109 |
|---|
| | | \x20 | 110 |
|---|
| | | \r | 111 |
|---|
| | | ] | 112 |
|---|
| | | | 113 |
|---|
| | | TABLE_INSERT_MODE_ELEMENTS = %w[ | 114 |
|---|
| | | table | 115 |
|---|
| | | tbody | 116 |
|---|
| | | tfoot | 117 |
|---|
| | | thead | 118 |
|---|
| | | tr | 119 |
|---|
| | | ] | 120 |
|---|
| | | | 121 |
|---|
| | | ASCII_LOWERCASE = ('a'..'z').to_a.join('') | 122 |
|---|
| | | ASCII_UPPERCASE = ('A'..'Z').to_a.join('') | 123 |
|---|
| | | ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE | 124 |
|---|
| | | DIGITS = '0'..'9' | 125 |
|---|
| | | HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a | 126 |
|---|
| | | | 127 |
|---|
| | | # Heading elements need to be ordered | 128 |
|---|
| | | HEADING_ELEMENTS = %w[ | 129 |
|---|
| | | h1 | 130 |
|---|
| | | h2 | 131 |
|---|
| | | h3 | 132 |
|---|
| | | h4 | 133 |
|---|
| | | h5 | 134 |
|---|
| | | h6 | 135 |
|---|
| | | ] | 136 |
|---|
| | | | 137 |
|---|
| | | # XXX What about event-source and command? | 138 |
|---|
| | | VOID_ELEMENTS = %w[ | 139 |
|---|
| | | base | 140 |
|---|
| | | link | 141 |
|---|
| | | meta | 142 |
|---|
| | | hr | 143 |
|---|
| | | br | 144 |
|---|
| | | img | 145 |
|---|
| | | embed | 146 |
|---|
| | | param | 147 |
|---|
| | | area | 148 |
|---|
| | | col | 149 |
|---|
| | | input | 150 |
|---|
| | | ] | 151 |
|---|
| | | | 152 |
|---|
| | | CDATA_ELEMENTS = %w[title textarea] | 153 |
|---|
| | | | 154 |
|---|
| | | RCDATA_ELEMENTS = %w[ | 155 |
|---|
| | | style | 156 |
|---|
| | | script | 157 |
|---|
| | | xmp | 158 |
|---|
| | | iframe | 159 |
|---|
| | | noembed | 160 |
|---|
| | | noframes | 161 |
|---|
| | | noscript | 162 |
|---|
| | | ] | 163 |
|---|
| | | | 164 |
|---|
| | | BOOLEAN_ATTRIBUTES = { | 165 |
|---|
| | | :global => %w[irrelevant], | 166 |
|---|
| | | 'style' => %w[scoped], | 167 |
|---|
| | | 'img' => %w[ismap], | 168 |
|---|
| | | 'audio' => %w[autoplay controls], | 169 |
|---|
| | | 'video' => %w[autoplay controls], | 170 |
|---|
| | | 'script' => %w[defer async], | 171 |
|---|
| | | 'details' => %w[open], | 172 |
|---|
| | | 'datagrid' => %w[multiple disabled], | 173 |
|---|
| | | 'command' => %w[hidden disabled checked default], | 174 |
|---|
| | | 'menu' => %w[autosubmit], | 175 |
|---|
| | | 'fieldset' => %w[disabled readonly], | 176 |
|---|
| | | 'option' => %w[disabled readonly selected], | 177 |
|---|
| | | 'optgroup' => %w[disabled readonly], | 178 |
|---|
| | | 'button' => %w[disabled autofocus], | 179 |
|---|
| | | 'input' => %w[disabled readonly required autofocus checked ismap], | 180 |
|---|
| | | 'select' => %w[disabled readonly autofocus multiple], | 181 |
|---|
| | | 'output' => %w[disabled readonly] | 182 |
|---|
| | | | 183 |
|---|
| | | } | 184 |
|---|
| | | | 185 |
|---|
| | | # entitiesWindows1252 has to be _ordered_ and needs to have an index. | 186 |
|---|
| | | ENTITIES_WINDOWS1252 = [ | 187 |
|---|
| | | 8364, # 0x80 0x20AC EURO SIGN | 188 |
|---|
| | | 65533, # 0x81 UNDEFINED | 189 |
|---|
| | | 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK | 190 |
|---|
| | | 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK | 191 |
|---|
| | | 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK | 192 |
|---|
| | | 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS | 193 |
|---|
| | | 8224, # 0x86 0x2020 DAGGER | 194 |
|---|
| | | 8225, # 0x87 0x2021 DOUBLE DAGGER | 195 |
|---|
| | | 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT | 196 |
|---|
| | | 8240, # 0x89 0x2030 PER MILLE SIGN | 197 |
|---|
| | | 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON | 198 |
|---|
| | | 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 199 |
|---|
| | | 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE | 200 |
|---|
| | | 65533, # 0x8D UNDEFINED | 201 |
|---|
| | | 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON | 202 |
|---|
| | | 65533, # 0x8F UNDEFINED | 203 |
|---|
| | | 65533, # 0x90 UNDEFINED | 204 |
|---|
| | | 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK | 205 |
|---|
| | | 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK | 206 |
|---|
| | | 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK | 207 |
|---|
| | | 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK | 208 |
|---|
| | | 8226, # 0x95 0x2022 BULLET | 209 |
|---|
| | | 8211, # 0x96 0x2013 EN DASH | 210 |
|---|
| | | 8212, # 0x97 0x2014 EM DASH | 211 |
|---|
| | | 732, # 0x98 0x02DC SMALL TILDE | 212 |
|---|
| | | 8482, # 0x99 0x2122 TRADE MARK SIGN | 213 |
|---|
| | | 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON | 214 |
|---|
| | | 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 215 |
|---|
| | | 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE | 216 |
|---|
| | | 65533, # 0x9D UNDEFINED | 217 |
|---|
| | | 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON | 218 |
|---|
| | | 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS | 219 |
|---|
| | | ] | 220 |
|---|
| | | | 221 |
|---|
| | | # ENTITIES was generated from Python using the following code: | 222 |
|---|
| | | # | 223 |
|---|
| | | # import constants | 224 |
|---|
| | | # entities = constants.entities.items() | 225 |
|---|
| | | # entities.sort() | 226 |
|---|
| | | # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and | 227 |
|---|
| | | # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')]) | 228 |
|---|
| | | # for entity, value in entities] | 229 |
|---|
| | | # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }' | 230 |
|---|
| | | | 231 |
|---|
| | | ENTITIES = { | 232 |
|---|
| | | 'AElig' => "\xc3\x86", | 233 |
|---|
| | | 'AElig;' => "\xc3\x86", | 234 |
|---|
| | | 'AMP' => '&', | 235 |
|---|
| | | 'AMP;' => '&', | 236 |
|---|
| | | 'Aacute' => "\xc3\x81", | 237 |
|---|
| | | 'Aacute;' => "\xc3\x81", | 238 |
|---|
| | | 'Acirc' => "\xc3\x82", | 239 |
|---|
| | | 'Acirc;' => "\xc3\x82", | 240 |
|---|
| | | 'Agrave' => "\xc3\x80", | 241 |
|---|
| | | 'Agrave;' => "\xc3\x80", | 242 |
|---|
| | | 'Alpha;' => "\xce\x91", | 243 |
|---|
| | | 'Aring' => "\xc3\x85", | 244 |
|---|
| | | 'Aring;' => "\xc3\x85", | 245 |
|---|
| | | 'Atilde' => "\xc3\x83", | 246 |
|---|
| | | 'Atilde;' => "\xc3\x83", | 247 |
|---|
| | | 'Auml' => "\xc3\x84", | 248 |
|---|
| | | 'Auml;' => "\xc3\x84", | 249 |
|---|
| | | 'Beta;' => "\xce\x92", | 250 |
|---|
| | | 'COPY' => "\xc2\xa9", | 251 |
|---|
| | | 'COPY;' => "\xc2\xa9", | 252 |
|---|
| | | 'Ccedil' => "\xc3\x87", | 253 |
|---|
| | | 'Ccedil;' => "\xc3\x87", | 254 |
|---|
| | | 'Chi;' => "\xce\xa7", | 255 |
|---|
| | | 'Dagger;' => "\xe2\x80\xa1", | 256 |
|---|
| | | 'Delta;' => "\xce\x94", | 257 |
|---|
| | | 'ETH' => "\xc3\x90", | 258 |
|---|
| | | 'ETH;' => "\xc3\x90", | 259 |
|---|
| | | 'Eacute' => "\xc3\x89", | 260 |
|---|
| | | 'Eacute;' => "\xc3\x89", | 261 |
|---|
| | | 'Ecirc' => "\xc3\x8a", | 262 |
|---|
| | | 'Ecirc;' => "\xc3\x8a", | 263 |
|---|
| | | 'Egrave' => "\xc3\x88", | 264 |
|---|
| | | 'Egrave;' => "\xc3\x88", | 265 |
|---|
| | | 'Epsilon;' => "\xce\x95", | 266 |
|---|
| | | 'Eta;' => "\xce\x97", | 267 |
|---|
| | | 'Euml' => "\xc3\x8b", | 268 |
|---|
| | | 'Euml;' => "\xc3\x8b", | 269 |
|---|
| | | 'GT' => '>', | 270 |
|---|
| | | 'GT;' => '>', | 271 |
|---|
| | | 'Gamma;' => "\xce\x93", | 272 |
|---|
| | | 'Iacute' => "\xc3\x8d", | 273 |
|---|
| | | 'Iacute;' => "\xc3\x8d", | 274 |
|---|
| | | 'Icirc' => "\xc3\x8e", | 275 |
|---|
| | | 'Icirc;' => "\xc3\x8e", | 276 |
|---|
| | | 'Igrave' => "\xc3\x8c", | 277 |
|---|
| | | 'Igrave;' => "\xc3\x8c", | 278 |
|---|
| | | 'Iota;' => "\xce\x99", | 279 |
|---|
| | | 'Iuml' => "\xc3\x8f", | 280 |
|---|
| | | 'Iuml;' => "\xc3\x8f", | 281 |
|---|
| | | 'Kappa;' => "\xce\x9a", | 282 |
|---|
| | | 'LT' => '<', | 283 |
|---|
| | | 'LT;' => '<', | 284 |
|---|
| | | 'Lambda;' => "\xce\x9b", | 285 |
|---|
| | | 'Mu;' => "\xce\x9c", | 286 |
|---|
| | | 'Ntilde' => "\xc3\x91", | 287 |
|---|
| | | 'Ntilde;' => "\xc3\x91", | 288 |
|---|
| | | 'Nu;' => "\xce\x9d", | 289 |
|---|
| | | 'OElig;' => "\xc5\x92", | 290 |
|---|
| | | 'Oacute' => "\xc3\x93", | 291 |
|---|
| | | 'Oacute;' => "\xc3\x93", | 292 |
|---|
| | | 'Ocirc' => "\xc3\x94", | 293 |
|---|
| | | 'Ocirc;' => "\xc3\x94", | 294 |
|---|
| | | 'Ograve' => "\xc3\x92", | 295 |
|---|
| | | 'Ograve;' => "\xc3\x92", | 296 |
|---|
| | | 'Omega;' => "\xce\xa9", | 297 |
|---|
| | | 'Omicron;' => "\xce\x9f", | 298 |
|---|
| | | 'Oslash' => "\xc3\x98", | 299 |
|---|
| | | 'Oslash;' => "\xc3\x98", | 300 |
|---|
| | | 'Otilde' => "\xc3\x95", | 301 |
|---|
| | | 'Otilde;' => "\xc3\x95", | 302 |
|---|
| | | 'Ouml' => "\xc3\x96", | 303 |
|---|
| | | 'Ouml;' => "\xc3\x96", | 304 |
|---|
| | | 'Phi;' => "\xce\xa6", | 305 |
|---|
| | | 'Pi;' => "\xce\xa0", | 306 |
|---|
| | | 'Prime;' => "\xe2\x80\xb3", | 307 |
|---|
| | | 'Psi;' => "\xce\xa8", | 308 |
|---|
| | | 'QUOT' => '"', | 309 |
|---|
| | | 'QUOT;' => '"', | 310 |
|---|
| | | 'REG' => "\xc2\xae", | 311 |
|---|
| | | 'REG;' => "\xc2\xae", | 312 |
|---|
| | | 'Rho;' => "\xce\xa1", | 313 |
|---|
| | | 'Scaron;' => "\xc5\xa0", | 314 |
|---|
| | | 'Sigma;' => "\xce\xa3", | 315 |
|---|
| | | 'THORN' => "\xc3\x9e", | 316 |
|---|
| | | 'THORN;' => "\xc3\x9e", | 317 |
|---|
| | | 'TRADE;' => "\xe2\x84\xa2", | 318 |
|---|
| | | 'Tau;' => "\xce\xa4", | 319 |
|---|
| | | 'Theta;' => "\xce\x98", | 320 |
|---|
| | | 'Uacute' => "\xc3\x9a", | 321 |
|---|
| | | 'Uacute;' => "\xc3\x9a", | 322 |
|---|
| | | 'Ucirc' => "\xc3\x9b", | 323 |
|---|
| | | 'Ucirc;' => "\xc3\x9b", | 324 |
|---|
| | | 'Ugrave' => "\xc3\x99", | 325 |
|---|
| | | 'Ugrave;' => "\xc3\x99", | 326 |
|---|
| | | 'Upsilon;' => "\xce\xa5", | 327 |
|---|
| | | 'Uuml' => "\xc3\x9c", | 328 |
|---|
| | | 'Uuml;' => "\xc3\x9c", | 329 |
|---|
| | | 'Xi;' => "\xce\x9e", | 330 |
|---|
| | | 'Yacute' => "\xc3\x9d", | 331 |
|---|
| | | 'Yacute;' => "\xc3\x9d", | 332 |
|---|
| | | 'Yuml;' => "\xc5\xb8", | 333 |
|---|
| | | 'Zeta;' => "\xce\x96", | 334 |
|---|
| | | 'aacute' => "\xc3\xa1", | 335 |
|---|
| | | 'aacute;' => "\xc3\xa1", | 336 |
|---|
| | | 'acirc' => "\xc3\xa2", | 337 |
|---|
| | | 'acirc;' => "\xc3\xa2", | 338 |
|---|
| | | 'acute' => "\xc2\xb4", | 339 |
|---|
| | | 'acute;' => "\xc2\xb4", | 340 |
|---|
| | | 'aelig' => "\xc3\xa6", | 341 |
|---|
| | | 'aelig;' => "\xc3\xa6", | 342 |
|---|
| | | 'agrave' => "\xc3\xa0", | 343 |
|---|
| | | 'agrave;' => "\xc3\xa0", | 344 |
|---|
| | | 'alefsym;' => "\xe2\x84\xb5", | 345 |
|---|
| | | 'alpha;' => "\xce\xb1", | 346 |
|---|
| | | 'amp' => '&', | 347 |
|---|
| | | 'amp;' => '&', | 348 |
|---|
| | | 'and;' => "\xe2\x88\xa7", | 349 |
|---|
| | | 'ang;' => "\xe2\x88\xa0", | 350 |
|---|
| | | 'apos;' => "'", | 351 |
|---|
| | | 'aring' => "\xc3\xa5", | 352 |
|---|
| | | 'aring;' => "\xc3\xa5", | 353 |
|---|
| | | 'asymp;' => "\xe2\x89\x88", | 354 |
|---|
| | | 'atilde' => "\xc3\xa3", | 355 |
|---|
| | | 'atilde;' => "\xc3\xa3", | 356 |
|---|
| | | 'auml' => "\xc3\xa4", | 357 |
|---|
| | | 'auml;' => "\xc3\xa4", | 358 |
|---|
| | | 'bdquo;' => "\xe2\x80\x9e", | 359 |
|---|
| | | 'beta;' => "\xce\xb2", | 360 |
|---|
| | | 'brvbar' => "\xc2\xa6", | 361 |
|---|
| | | 'brvbar;' => "\xc2\xa6", | 362 |
|---|
| | | 'bull;' => "\xe2\x80\xa2", | 363 |
|---|
| | | 'cap;' => "\xe2\x88\xa9", | 364 |
|---|
| | | 'ccedil' => "\xc3\xa7", | 365 |
|---|
| | | 'ccedil;' => "\xc3\xa7", | 366 |
|---|
| | | 'cedil' => "\xc2\xb8", | 367 |
|---|
| | | 'cedil;' => "\xc2\xb8", | 368 |
|---|
| | | 'cent' => "\xc2\xa2", | 369 |
|---|
| | | 'cent;' => "\xc2\xa2", | 370 |
|---|
| | | 'chi;' => "\xcf\x87", | 371 |
|---|
| | | 'circ;' => "\xcb\x86", | 372 |
|---|
| | | 'clubs;' => "\xe2\x99\xa3", | 373 |
|---|
| | | 'cong;' => "\xe2\x89\x85", | 374 |
|---|
| | | 'copy' => "\xc2\xa9", | 375 |
|---|
| | | 'copy;' => "\xc2\xa9", | 376 |
|---|
| | | 'crarr;' => "\xe2\x86\xb5", | 377 |
|---|
| | | 'cup;' => "\xe2\x88\xaa", | 378 |
|---|
| | | 'curren' => "\xc2\xa4", | 379 |
|---|
| | | 'curren;' => "\xc2\xa4", | 380 |
|---|
| | | 'dArr;' => "\xe2\x87\x93", | 381 |
|---|
| | | 'dagger;' => "\xe2\x80\xa0", | 382 |
|---|
| | | 'darr;' => "\xe2\x86\x93", | 383 |
|---|
| | | 'deg' => "\xc2\xb0", | 384 |
|---|
| | | 'deg;' => "\xc2\xb0", | 385 |
|---|
| | | 'delta;' => "\xce\xb4", | 386 |
|---|
| | | 'diams;' => "\xe2\x99\xa6", | 387 |
|---|
| | | 'divide' => "\xc3\xb7", | 388 |
|---|
| | | 'divide;' => "\xc3\xb7", | 389 |
|---|
| | | 'eacute' => "\xc3\xa9", | 390 |
|---|
| | | 'eacute;' => "\xc3\xa9", | 391 |
|---|
| | | 'ecirc' => "\xc3\xaa", | 392 |
|---|
| | | 'ecirc;' => "\xc3\xaa", | 393 |
|---|
| | | 'egrave' => "\xc3\xa8", | 394 |
|---|
| | | 'egrave;' => "\xc3\xa8", | 395 |
|---|
| | | 'empty;' => "\xe2\x88\x85", | 396 |
|---|
| | | 'emsp;' => "\xe2\x80\x83", | 397 |
|---|
| | | 'ensp;' => "\xe2\x80\x82", | 398 |
|---|
| | | 'epsilon;' => "\xce\xb5", | 399 |
|---|
| | | 'equiv;' => "\xe2\x89\xa1", | 400 |
|---|
| | | 'eta;' => "\xce\xb7", | 401 |
|---|
| | | 'eth' => "\xc3\xb0", | 402 |
|---|
| | | 'eth;' => "\xc3\xb0", | 403 |
|---|
| | | 'euml' => "\xc3\xab", | 404 |
|---|
| | | 'euml;' => "\xc3\xab", | 405 |
|---|
| | | 'euro;' => "\xe2\x82\xac", | 406 |
|---|
| | | 'exist;' => "\xe2\x88\x83", | 407 |
|---|
| | | 'fnof;' => "\xc6\x92", | 408 |
|---|
| | | 'forall;' => "\xe2\x88\x80", | 409 |
|---|
| | | 'frac12' => "\xc2\xbd", | 410 |
|---|
| | | 'frac12;' => "\xc2\xbd", | 411 |
|---|
| | | 'frac14' => "\xc2\xbc", | 412 |
|---|
| | | 'frac14;' => "\xc2\xbc", | 413 |
|---|
| | | 'frac34' => "\xc2\xbe", | 414 |
|---|
| | | 'frac34;' => "\xc2\xbe", | 415 |
|---|
| | | 'frasl;' => "\xe2\x81\x84", | 416 |
|---|
| | | 'gamma;' => "\xce\xb3", | 417 |
|---|
| | | 'ge;' => "\xe2\x89\xa5", | 418 |
|---|
| | | 'gt' => '>', | 419 |
|---|
| | | 'gt;' => '>', | 420 |
|---|
| | | 'hArr;' => "\xe2\x87\x94", | 421 |
|---|
| | | 'harr;' => "\xe2\x86\x94", | 422 |
|---|
| | | 'hearts;' => "\xe2\x99\xa5", | 423 |
|---|
| | | 'hellip;' => "\xe2\x80\xa6", | 424 |
|---|
| | | 'iacute' => "\xc3\xad", | 425 |
|---|
| | | 'iacute;' => "\xc3\xad", | 426 |
|---|
| | | 'icirc' => "\xc3\xae", | 427 |
|---|
| | | 'icirc;' => "\xc3\xae", | 428 |
|---|
| | | 'iexcl' => "\xc2\xa1", | 429 |
|---|
| | | 'iexcl;' => "\xc2\xa1", | 430 |
|---|
| | | 'igrave' => "\xc3\xac", | 431 |
|---|
| | | 'igrave;' => "\xc3\xac", | 432 |
|---|
| | | 'image;' => "\xe2\x84\x91", | 433 |
|---|
| | | 'infin;' => "\xe2\x88\x9e", | 434 |
|---|
| | | 'int;' => "\xe2\x88\xab", | 435 |
|---|
| | | 'iota;' => "\xce\xb9", | 436 |
|---|
| | | 'iquest' => "\xc2\xbf", | 437 |
|---|
| | | 'iquest;' => "\xc2\xbf", | 438 |
|---|
| | | 'isin;' => "\xe2\x88\x88", | 439 |
|---|
| | | 'iuml' => "\xc3\xaf", | 440 |
|---|
| | | 'iuml;' => "\xc3\xaf", | 441 |
|---|
| | | 'kappa;' => "\xce\xba", | 442 |
|---|
| | | 'lArr;' => "\xe2\x87\x90", | 443 |
|---|
| | | 'lambda;' => "\xce\xbb", | 444 |
|---|
| | | 'lang;' => "\xe3\x80\x88", | 445 |
|---|
| | | 'laquo' => "\xc2\xab", | 446 |
|---|
| | | 'laquo;' => "\xc2\xab", | 447 |
|---|
| | | 'larr;' => "\xe2\x86\x90", | 448 |
|---|
| | | 'lceil;' => "\xe2\x8c\x88", | 449 |
|---|
| | | 'ldquo;' => "\xe2\x80\x9c", | 450 |
|---|
| | | 'le;' => "\xe2\x89\xa4", | 451 |
|---|
| | | 'lfloor;' => "\xe2\x8c\x8a", | 452 |
|---|
| | | 'lowast;' => "\xe2\x88\x97", | 453 |
|---|
| | | 'loz;' => "\xe2\x97\x8a", | 454 |
|---|
| | | 'lrm;' => "\xe2\x80\x8e", | 455 |
|---|
| | | 'lsaquo;' => "\xe2\x80\xb9", | 456 |
|---|
| | | 'lsquo;' => "\xe2\x80\x98", | 457 |
|---|
| | | 'lt' => '<', | 458 |
|---|
| | | 'lt;' => '<', | 459 |
|---|
| | | 'macr' => "\xc2\xaf", | 460 |
|---|
| | | 'macr;' => "\xc2\xaf", | 461 |
|---|
| | | 'mdash;' => "\xe2\x80\x94", | 462 |
|---|
| | | 'micro' => "\xc2\xb5", | 463 |
|---|
| | | 'micro;' => "\xc2\xb5", | 464 |
|---|
| | | 'middot' => "\xc2\xb7", | 465 |
|---|
| | | 'middot;' => "\xc2\xb7", | 466 |
|---|
| | | 'minus;' => "\xe2\x88\x92", | 467 |
|---|
| | | 'mu;' => "\xce\xbc", | 468 |
|---|
| | | 'nabla;' => "\xe2\x88\x87", | 469 |
|---|
| | | 'nbsp' => "\xc2\xa0", | 470 |
|---|
| | | 'nbsp;' => "\xc2\xa0", | 471 |
|---|
| | | 'ndash;' => "\xe2\x80\x93", | 472 |
|---|
| | | 'ne;' => "\xe2\x89\xa0", | 473 |
|---|
| | | 'ni;' => "\xe2\x88\x8b", | 474 |
|---|
| | | 'not' => "\xc2\xac", | 475 |
|---|
| | | 'not;' => "\xc2\xac", | 476 |
|---|
| | | 'notin;' => "\xe2\x88\x89", | 477 |
|---|
| | | 'nsub;' => "\xe2\x8a\x84", | 478 |
|---|
| | | 'ntilde' => "\xc3\xb1", | 479 |
|---|
| | | 'ntilde;' => "\xc3\xb1", | 480 |
|---|
| | | 'nu;' => "\xce\xbd", | 481 |
|---|
| | | 'oacute' => "\xc3\xb3", | 482 |
|---|
| | | 'oacute;' => "\xc3\xb3", | 483 |
|---|
| | | 'ocirc' => "\xc3\xb4", | 484 |
|---|
| | | 'ocirc;' => "\xc3\xb4", | 485 |
|---|
| | | 'oelig;' => "\xc5\x93", | 486 |
|---|
| | | 'ograve' => "\xc3\xb2", | 487 |
|---|
| | | 'ograve;' => "\xc3\xb2", | 488 |
|---|
| | | 'oline;' => "\xe2\x80\xbe", | 489 |
|---|
| | | 'omega;' => "\xcf\x89", | 490 |
|---|
| | | 'omicron;' => "\xce\xbf", | 491 |
|---|
| | | 'oplus;' => "\xe2\x8a\x95", | 492 |
|---|
| | | 'or;' => "\xe2\x88\xa8", | 493 |
|---|
| | | 'ordf' => "\xc2\xaa", | 494 |
|---|
| | | 'ordf;' => "\xc2\xaa", | 495 |
|---|
| | | 'ordm' => "\xc2\xba", | 496 |
|---|
| | | 'ordm;' => "\xc2\xba", | 497 |
|---|
| | | 'oslash' => "\xc3\xb8", | 498 |
|---|
| | | 'oslash;' => "\xc3\xb8", | 499 |
|---|
| | | 'otilde' => "\xc3\xb5", | 500 |
|---|
| | | 'otilde;' => "\xc3\xb5", | 501 |
|---|
| | | 'otimes;' => "\xe2\x8a\x97", | 502 |
|---|
| | | 'ouml' => "\xc3\xb6", | 503 |
|---|
| | | 'ouml;' => "\xc3\xb6", | 504 |
|---|
| | | 'para' => "\xc2\xb6", | 505 |
|---|
| | | 'para;' => "\xc2\xb6", | 506 |
|---|
| | | 'part;' => "\xe2\x88\x82", | 507 |
|---|
| | | 'permil;' => "\xe2\x80\xb0", | 508 |
|---|
| | | 'perp;' => "\xe2\x8a\xa5", | 509 |
|---|
| | | 'phi;' => "\xcf\x86", | 510 |
|---|
| | | 'pi;' => "\xcf\x80", | 511 |
|---|
| | | 'piv;' => "\xcf\x96", | 512 |
|---|
| | | 'plusmn' => "\xc2\xb1", | 513 |
|---|
| | | 'plusmn;' => "\xc2\xb1", | 514 |
|---|
| | | 'pound' => "\xc2\xa3", | 515 |
|---|
| | | 'pound;' => "\xc2\xa3", | 516 |
|---|
| | | 'prime;' => "\xe2\x80\xb2", | 517 |
|---|
| | | 'prod;' => "\xe2\x88\x8f", | 518 |
|---|
| | | 'prop;' => "\xe2\x88\x9d", | 519 |
|---|
| | | 'psi;' => "\xcf\x88", | 520 |
|---|
| | | 'quot' => '"', | 521 |
|---|
| | | 'quot;' => '"', | 522 |
|---|
| | | 'rArr;' => "\xe2\x87\x92", | 523 |
|---|
| | | 'radic;' => "\xe2\x88\x9a", | 524 |
|---|
| | | 'rang;' => "\xe3\x80\x89", | 525 |
|---|
| | | 'raquo' => "\xc2\xbb", | 526 |
|---|
| | | 'raquo;' => "\xc2\xbb", | 527 |
|---|
| | | 'rarr;' => "\xe2\x86\x92", | 528 |
|---|
| | | 'rceil;' => "\xe2\x8c\x89", | 529 |
|---|
| | | 'rdquo;' => "\xe2\x80\x9d", | 530 |
|---|
| | | 'real;' => "\xe2\x84\x9c", | 531 |
|---|
| | | 'reg' => "\xc2\xae", | 532 |
|---|
| | | 'reg;' => "\xc2\xae", | 533 |
|---|
| | | 'rfloor;' => "\xe2\x8c\x8b", | 534 |
|---|
| | | 'rho;' => "\xcf\x81", | 535 |
|---|
| | | 'rlm;' => "\xe2\x80\x8f", | 536 |
|---|
| | | 'rsaquo;' => "\xe2\x80\xba", | 537 |
|---|
| | | 'rsquo;' => "\xe2\x80\x99", | 538 |
|---|
| | | 'sbquo;' => "\xe2\x80\x9a", | 539 |
|---|
| | | 'scaron;' => "\xc5\xa1", | 540 |
|---|
| | | 'sdot;' => "\xe2\x8b\x85", | 541 |
|---|
| | | 'sect' => "\xc2\xa7", | 542 |
|---|
| | | 'sect;' => "\xc2\xa7", | 543 |
|---|
| | | 'shy' => "\xc2\xad", | 544 |
|---|
| | | 'shy;' => "\xc2\xad", | 545 |
|---|
| | | 'sigma;' => "\xcf\x83", | 546 |
|---|
| | | 'sigmaf;' => "\xcf\x82", | 547 |
|---|
| | | 'sim;' => "\xe2\x88\xbc", | 548 |
|---|
| | | 'spades;' => "\xe2\x99\xa0", | 549 |
|---|
| | | 'sub;' => "\xe2\x8a\x82", | 550 |
|---|
| | | 'sube;' => "\xe2\x8a\x86", | 551 |
|---|
| | | 'sum;' => "\xe2\x88\x91", | 552 |
|---|
| | | 'sup1' => "\xc2\xb9", | 553 |
|---|
| | | 'sup1;' => "\xc2\xb9", | 554 |
|---|
| | | 'sup2' => "\xc2\xb2", | 555 |
|---|
| | | 'sup2;' => "\xc2\xb2", | 556 |
|---|
| | | 'sup3' => "\xc2\xb3", | 557 |
|---|
| | | 'sup3;' => "\xc2\xb3", | 558 |
|---|
| | | 'sup;' => "\xe2\x8a\x83", | 559 |
|---|
| | | 'supe;' => "\xe2\x8a\x87", | 560 |
|---|
| | | 'szlig' => "\xc3\x9f", | 561 |
|---|
| | | 'szlig;' => "\xc3\x9f", | 562 |
|---|
| | | 'tau;' => "\xcf\x84", | 563 |
|---|
| | | 'there4;' => "\xe2\x88\xb4", | 564 |
|---|
| | | 'theta;' => "\xce\xb8", | 565 |
|---|
| | | 'thetasym;' => "\xcf\x91", | 566 |
|---|
| | | 'thinsp;' => "\xe2\x80\x89", | 567 |
|---|
| | | 'thorn' => "\xc3\xbe", | 568 |
|---|
| | | 'thorn;' => "\xc3\xbe", | 569 |
|---|
| | | 'tilde;' => "\xcb\x9c", | 570 |
|---|
| | | 'times' => "\xc3\x97", | 571 |
|---|
| | | 'times;' => "\xc3\x97", | 572 |
|---|
| | | 'trade;' => "\xe2\x84\xa2", | 573 |
|---|
| | | 'uArr;' => "\xe2\x87\x91", | 574 |
|---|
| | | 'uacute' => "\xc3\xba", | 575 |
|---|
| | | 'uacute;' => "\xc3\xba", | 576 |
|---|
| | | 'uarr;' => "\xe2\x86\x91", | 577 |
|---|
| | | 'ucirc' => "\xc3\xbb", | 578 |
|---|
| | | 'ucirc;' => "\xc3\xbb", | 579 |
|---|
| | | 'ugrave' => "\xc3\xb9", | 580 |
|---|
| | | 'ugrave;' => "\xc3\xb9", | 581 |
|---|
| | | 'uml' => "\xc2\xa8", | 582 |
|---|
| | | 'uml;' => "\xc2\xa8", | 583 |
|---|
| | | 'upsih;' => "\xcf\x92", | 584 |
|---|
| | | 'upsilon;' => "\xcf\x85", | 585 |
|---|
| | | 'uuml' => "\xc3\xbc", | 586 |
|---|
| | | 'uuml;' => "\xc3\xbc", | 587 |
|---|
| | | 'weierp;' => "\xe2\x84\x98", | 588 |
|---|
| | | 'xi;' => "\xce\xbe", | 589 |
|---|
| | | 'yacute' => "\xc3\xbd", | 590 |
|---|
| | | 'yacute;' => "\xc3\xbd", | 591 |
|---|
| | | 'yen' => "\xc2\xa5", | 592 |
|---|
| | | 'yen;' => "\xc2\xa5", | 593 |
|---|
| | | 'yuml' => "\xc3\xbf", | 594 |
|---|
| | | 'yuml;' => "\xc3\xbf", | 595 |
|---|
| | | 'zeta;' => "\xce\xb6", | 596 |
|---|
| | | 'zwj;' => "\xe2\x80\x8d", | 597 |
|---|
| | | 'zwnj;' => "\xe2\x80\x8c" | 598 |
|---|
| | | } | 599 |
|---|
| | | | 600 |
|---|
| | | ENCODINGS = %w[ | 601 |
|---|
| | | ansi_x3.4-1968 | 602 |
|---|
| | | iso-ir-6 | 603 |
|---|
| | | ansi_x3.4-1986 | 604 |
|---|
| | | iso_646.irv:1991 | 605 |
|---|
| | | ascii | 606 |
|---|
| | | iso646-us | 607 |
|---|
| | | us-ascii | 608 |
|---|
| | | us | 609 |
|---|
| | | ibm367 | 610 |
|---|
| | | cp367 | 611 |
|---|
| | | csascii | 612 |
|---|
| | | ks_c_5601-1987 | 613 |
|---|
| | | korean | 614 |
|---|
| | | iso-2022-kr | 615 |
|---|
| | | csiso2022kr | 616 |
|---|
| | | euc-kr | 617 |
|---|
| | | iso-2022-jp | 618 |
|---|
| | | csiso2022jp | 619 |
|---|
| | | iso-2022-jp-2 | 620 |
|---|
| | | iso-ir-58 | 621 |
|---|
| | | chinese | 622 |
|---|
| | | csiso58gb231280 | 623 |
|---|
| | | iso_8859-1:1987 | 624 |
|---|
| | | iso-ir-100 | 625 |
|---|
| | | iso_8859-1 | 626 |
|---|
| | | iso-8859-1 | 627 |
|---|
| | | latin1 | 628 |
|---|
| | | l1 | 629 |
|---|
| | | ibm819 | 630 |
|---|
| | | cp819 | 631 |
|---|
| | | csisolatin1 | 632 |
|---|
| | | iso_8859-2:1987 | 633 |
|---|
| | | iso-ir-101 | 634 |
|---|
| | | iso_8859-2 | 635 |
|---|
| | | iso-8859-2 | 636 |
|---|
| | | latin2 | 637 |
|---|
| | | l2 | 638 |
|---|
| | | csisolatin2 | 639 |
|---|
| | | iso_8859-3:1988 | 640 |
|---|
| | | iso-ir-109 | 641 |
|---|
| | | iso_8859-3 | 642 |
|---|
| | | iso-8859-3 | 643 |
|---|
| | | latin3 | 644 |
|---|
| | < |
|---|