1*4882a593Smuzhiyun"""Tests of the builder registry.""" 2*4882a593Smuzhiyun 3*4882a593Smuzhiyunimport unittest 4*4882a593Smuzhiyunimport warnings 5*4882a593Smuzhiyun 6*4882a593Smuzhiyunfrom bs4 import BeautifulSoup 7*4882a593Smuzhiyunfrom bs4.builder import ( 8*4882a593Smuzhiyun builder_registry as registry, 9*4882a593Smuzhiyun HTMLParserTreeBuilder, 10*4882a593Smuzhiyun TreeBuilderRegistry, 11*4882a593Smuzhiyun) 12*4882a593Smuzhiyun 13*4882a593Smuzhiyuntry: 14*4882a593Smuzhiyun from bs4.builder import HTML5TreeBuilder 15*4882a593Smuzhiyun HTML5LIB_PRESENT = True 16*4882a593Smuzhiyunexcept ImportError: 17*4882a593Smuzhiyun HTML5LIB_PRESENT = False 18*4882a593Smuzhiyun 19*4882a593Smuzhiyuntry: 20*4882a593Smuzhiyun from bs4.builder import ( 21*4882a593Smuzhiyun LXMLTreeBuilderForXML, 22*4882a593Smuzhiyun LXMLTreeBuilder, 23*4882a593Smuzhiyun ) 24*4882a593Smuzhiyun LXML_PRESENT = True 25*4882a593Smuzhiyunexcept ImportError: 26*4882a593Smuzhiyun LXML_PRESENT = False 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun 29*4882a593Smuzhiyunclass BuiltInRegistryTest(unittest.TestCase): 30*4882a593Smuzhiyun """Test the built-in registry with the default builders registered.""" 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun def test_combination(self): 33*4882a593Smuzhiyun if LXML_PRESENT: 34*4882a593Smuzhiyun self.assertEqual(registry.lookup('fast', 'html'), 35*4882a593Smuzhiyun LXMLTreeBuilder) 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun if LXML_PRESENT: 38*4882a593Smuzhiyun self.assertEqual(registry.lookup('permissive', 'xml'), 39*4882a593Smuzhiyun LXMLTreeBuilderForXML) 40*4882a593Smuzhiyun self.assertEqual(registry.lookup('strict', 'html'), 41*4882a593Smuzhiyun HTMLParserTreeBuilder) 42*4882a593Smuzhiyun if HTML5LIB_PRESENT: 43*4882a593Smuzhiyun self.assertEqual(registry.lookup('html5lib', 'html'), 44*4882a593Smuzhiyun HTML5TreeBuilder) 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun def test_lookup_by_markup_type(self): 47*4882a593Smuzhiyun if LXML_PRESENT: 48*4882a593Smuzhiyun self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) 49*4882a593Smuzhiyun self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) 50*4882a593Smuzhiyun else: 51*4882a593Smuzhiyun self.assertEqual(registry.lookup('xml'), None) 52*4882a593Smuzhiyun if HTML5LIB_PRESENT: 53*4882a593Smuzhiyun self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) 54*4882a593Smuzhiyun else: 55*4882a593Smuzhiyun self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun def test_named_library(self): 58*4882a593Smuzhiyun if LXML_PRESENT: 59*4882a593Smuzhiyun self.assertEqual(registry.lookup('lxml', 'xml'), 60*4882a593Smuzhiyun LXMLTreeBuilderForXML) 61*4882a593Smuzhiyun self.assertEqual(registry.lookup('lxml', 'html'), 62*4882a593Smuzhiyun LXMLTreeBuilder) 63*4882a593Smuzhiyun if HTML5LIB_PRESENT: 64*4882a593Smuzhiyun self.assertEqual(registry.lookup('html5lib'), 65*4882a593Smuzhiyun HTML5TreeBuilder) 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun self.assertEqual(registry.lookup('html.parser'), 68*4882a593Smuzhiyun HTMLParserTreeBuilder) 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun def test_beautifulsoup_constructor_does_lookup(self): 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun with warnings.catch_warnings(record=True) as w: 73*4882a593Smuzhiyun # This will create a warning about not explicitly 74*4882a593Smuzhiyun # specifying a parser, but we'll ignore it. 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun # You can pass in a string. 77*4882a593Smuzhiyun BeautifulSoup("", features="html") 78*4882a593Smuzhiyun # Or a list of strings. 79*4882a593Smuzhiyun BeautifulSoup("", features=["html", "fast"]) 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun # You'll get an exception if BS can't find an appropriate 82*4882a593Smuzhiyun # builder. 83*4882a593Smuzhiyun self.assertRaises(ValueError, BeautifulSoup, 84*4882a593Smuzhiyun "", features="no-such-feature") 85*4882a593Smuzhiyun 86*4882a593Smuzhiyunclass RegistryTest(unittest.TestCase): 87*4882a593Smuzhiyun """Test the TreeBuilderRegistry class in general.""" 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun def setUp(self): 90*4882a593Smuzhiyun self.registry = TreeBuilderRegistry() 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun def builder_for_features(self, *feature_list): 93*4882a593Smuzhiyun cls = type('Builder_' + '_'.join(feature_list), 94*4882a593Smuzhiyun (object,), {'features' : feature_list}) 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun self.registry.register(cls) 97*4882a593Smuzhiyun return cls 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun def test_register_with_no_features(self): 100*4882a593Smuzhiyun builder = self.builder_for_features() 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun # Since the builder advertises no features, you can't find it 103*4882a593Smuzhiyun # by looking up features. 104*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('foo'), None) 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun # But you can find it by doing a lookup with no features, if 107*4882a593Smuzhiyun # this happens to be the only registered builder. 108*4882a593Smuzhiyun self.assertEqual(self.registry.lookup(), builder) 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun def test_register_with_features_makes_lookup_succeed(self): 111*4882a593Smuzhiyun builder = self.builder_for_features('foo', 'bar') 112*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('foo'), builder) 113*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('bar'), builder) 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun def test_lookup_fails_when_no_builder_implements_feature(self): 116*4882a593Smuzhiyun builder = self.builder_for_features('foo', 'bar') 117*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('baz'), None) 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): 120*4882a593Smuzhiyun builder1 = self.builder_for_features('foo') 121*4882a593Smuzhiyun builder2 = self.builder_for_features('bar') 122*4882a593Smuzhiyun self.assertEqual(self.registry.lookup(), builder2) 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun def test_lookup_fails_when_no_tree_builders_registered(self): 125*4882a593Smuzhiyun self.assertEqual(self.registry.lookup(), None) 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun def test_lookup_gets_most_recent_builder_supporting_all_features(self): 128*4882a593Smuzhiyun has_one = self.builder_for_features('foo') 129*4882a593Smuzhiyun has_the_other = self.builder_for_features('bar') 130*4882a593Smuzhiyun has_both_early = self.builder_for_features('foo', 'bar', 'baz') 131*4882a593Smuzhiyun has_both_late = self.builder_for_features('foo', 'bar', 'quux') 132*4882a593Smuzhiyun lacks_one = self.builder_for_features('bar') 133*4882a593Smuzhiyun has_the_other = self.builder_for_features('foo') 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun # There are two builders featuring 'foo' and 'bar', but 136*4882a593Smuzhiyun # the one that also features 'quux' was registered later. 137*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('foo', 'bar'), 138*4882a593Smuzhiyun has_both_late) 139*4882a593Smuzhiyun 140*4882a593Smuzhiyun # There is only one builder featuring 'foo', 'bar', and 'baz'. 141*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), 142*4882a593Smuzhiyun has_both_early) 143*4882a593Smuzhiyun 144*4882a593Smuzhiyun def test_lookup_fails_when_cannot_reconcile_requested_features(self): 145*4882a593Smuzhiyun builder1 = self.builder_for_features('foo', 'bar') 146*4882a593Smuzhiyun builder2 = self.builder_for_features('foo', 'baz') 147*4882a593Smuzhiyun self.assertEqual(self.registry.lookup('bar', 'baz'), None) 148