Lines Matching +full:utf +full:- +full:8
103 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
111 …doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/x…
127 markup = b"""<?xml version="1.0" encoding="utf-8"?>
128 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
135 soup.encode("utf-8").replace(b"\n", b""),
152 """A <p> tag is never designated as an empty-element tag.
154 Even if the markup shows it as an empty-element tag, it
164 This applies to all tags except empty-element tags.
172 """A <br> tag is designated as an empty-element tag.
175 two tags, but it should always be an empty-element tag.
203 markup = "<p>foo<!--foobar-->baz</p>"
368 …markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE}…
424 self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
428 … expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
430 self.assertEqual(soup.p.encode("utf-8"), expected)
434 # easy-to-understand document.
436 # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
437 …ode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/>…
439 # That's because we're going to encode it into ISO-Latin-1, and use
441 iso_latin_html = unicode_html.encode("iso-8859-1")
443 # Parse the ISO-Latin-1 HTML.
445 # Encode it to UTF-8.
446 result = soup.encode("utf-8")
450 # UTF-8 instead of ISO-Latin-1.
451 expected = unicode_html.replace("ISO-Latin-1", "utf-8")
453 # And, of course, it would be in UTF-8, not Unicode.
454 expected = expected.encode("utf-8")
456 # Ta-da!
461 # Shift-JIS encoding, without choking.
464 b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
468 unicode_html = shift_jis_html.decode("shift-jis")
473 self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
477 # A real-world test to make sure we can convert ISO-8859-9 (a
478 # Hebrew encoding) to UTF-8.
479 …t = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebr…
481 hebrew_document, from_encoding="iso8859-8")
482 self.assertEqual(soup.original_encoding, 'iso8859-8')
484 soup.encode('utf-8'),
485 hebrew_document.decode("iso8859-8").encode("utf-8"))
489 # encoded in Shift-JIS.
490 meta_tag = ('<meta content="text/html; charset=x-sjis" '
491 'http-equiv="Content-type"/>')
496 '<meta http-equiv="Content-language" content="ja"/>'
497 '</head><body>Shift-JIS markup goes here.') % meta_tag
501 parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
503 self.assertEqual('text/html; charset=x-sjis', content)
517 # encoded in Shift-JIS.
518 meta_tag = ('<meta id="encoding" charset="x-sjis" />')
523 '<meta http-equiv="Content-language" content="ja"/>'
524 '</head><body>Shift-JIS markup goes here.') % meta_tag
530 self.assertEqual('x-sjis', charset)
558 soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
567 markup = b"""<?xml version="1.0" encoding="utf-8"?>
568 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
575 soup.encode("utf-8"), markup)
582 soup = BeautifulSoup(doc, "lxml-xml")
590 …markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu…
595 …markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc…
608 markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
612 self.assertEqual(soup.encode("utf-8"), markup)
632 …markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http…
670 markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
673 self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')