1*4882a593Smuzhiyun"""Tests to ensure that the lxml tree builder generates good trees.""" 2*4882a593Smuzhiyun 3*4882a593Smuzhiyunimport warnings 4*4882a593Smuzhiyun 5*4882a593Smuzhiyuntry: 6*4882a593Smuzhiyun import lxml.etree 7*4882a593Smuzhiyun LXML_PRESENT = True 8*4882a593Smuzhiyun LXML_VERSION = lxml.etree.LXML_VERSION 9*4882a593Smuzhiyunexcept ImportError as e: 10*4882a593Smuzhiyun LXML_PRESENT = False 11*4882a593Smuzhiyun LXML_VERSION = (0,) 12*4882a593Smuzhiyun 13*4882a593Smuzhiyunif LXML_PRESENT: 14*4882a593Smuzhiyun from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML 15*4882a593Smuzhiyun 16*4882a593Smuzhiyunfrom bs4 import BeautifulStoneSoup 17*4882a593Smuzhiyunfrom bs4.testing import skipIf 18*4882a593Smuzhiyunfrom bs4.testing import ( 19*4882a593Smuzhiyun HTMLTreeBuilderSmokeTest, 20*4882a593Smuzhiyun XMLTreeBuilderSmokeTest, 21*4882a593Smuzhiyun SoupTest, 22*4882a593Smuzhiyun skipIf, 23*4882a593Smuzhiyun) 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun@skipIf( 26*4882a593Smuzhiyun not LXML_PRESENT, 27*4882a593Smuzhiyun "lxml seems not to be present, not testing its tree builder.") 28*4882a593Smuzhiyunclass LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 29*4882a593Smuzhiyun """See ``HTMLTreeBuilderSmokeTest``.""" 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun @property 32*4882a593Smuzhiyun def default_builder(self): 33*4882a593Smuzhiyun return LXMLTreeBuilder() 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun def test_out_of_range_entity(self): 36*4882a593Smuzhiyun self.assertSoupEquals( 37*4882a593Smuzhiyun "<p>foo�bar</p>", "<p>foobar</p>") 38*4882a593Smuzhiyun self.assertSoupEquals( 39*4882a593Smuzhiyun "<p>foo�bar</p>", "<p>foobar</p>") 40*4882a593Smuzhiyun self.assertSoupEquals( 41*4882a593Smuzhiyun "<p>foo�bar</p>", "<p>foobar</p>") 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this 44*4882a593Smuzhiyun # test if an old version of lxml is installed. 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun @skipIf( 47*4882a593Smuzhiyun not LXML_PRESENT or LXML_VERSION < (2,3,5,0), 48*4882a593Smuzhiyun "Skipping doctype test for old version of lxml to avoid segfault.") 49*4882a593Smuzhiyun def test_empty_doctype(self): 50*4882a593Smuzhiyun soup = self.soup("<!DOCTYPE>") 51*4882a593Smuzhiyun doctype = soup.contents[0] 52*4882a593Smuzhiyun self.assertEqual("", doctype.strip()) 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun def test_beautifulstonesoup_is_xml_parser(self): 55*4882a593Smuzhiyun # Make sure that the deprecated BSS class uses an xml builder 56*4882a593Smuzhiyun # if one is installed. 57*4882a593Smuzhiyun with warnings.catch_warnings(record=True) as w: 58*4882a593Smuzhiyun soup = BeautifulStoneSoup("<b />") 59*4882a593Smuzhiyun self.assertEqual("<b/>", str(soup.b)) 60*4882a593Smuzhiyun self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun@skipIf( 63*4882a593Smuzhiyun not LXML_PRESENT, 64*4882a593Smuzhiyun "lxml seems not to be present, not testing its XML tree builder.") 65*4882a593Smuzhiyunclass LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): 66*4882a593Smuzhiyun """See ``HTMLTreeBuilderSmokeTest``.""" 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun @property 69*4882a593Smuzhiyun def default_builder(self): 70*4882a593Smuzhiyun return LXMLTreeBuilderForXML() 71