xref: /OK3568_Linux_fs/yocto/bitbake/lib/bs4/tests/test_builder_registry.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun"""Tests of the builder registry."""
2*4882a593Smuzhiyun
3*4882a593Smuzhiyunimport unittest
4*4882a593Smuzhiyunimport warnings
5*4882a593Smuzhiyun
6*4882a593Smuzhiyunfrom bs4 import BeautifulSoup
7*4882a593Smuzhiyunfrom bs4.builder import (
8*4882a593Smuzhiyun    builder_registry as registry,
9*4882a593Smuzhiyun    HTMLParserTreeBuilder,
10*4882a593Smuzhiyun    TreeBuilderRegistry,
11*4882a593Smuzhiyun)
12*4882a593Smuzhiyun
13*4882a593Smuzhiyuntry:
14*4882a593Smuzhiyun    from bs4.builder import HTML5TreeBuilder
15*4882a593Smuzhiyun    HTML5LIB_PRESENT = True
16*4882a593Smuzhiyunexcept ImportError:
17*4882a593Smuzhiyun    HTML5LIB_PRESENT = False
18*4882a593Smuzhiyun
19*4882a593Smuzhiyuntry:
20*4882a593Smuzhiyun    from bs4.builder import (
21*4882a593Smuzhiyun        LXMLTreeBuilderForXML,
22*4882a593Smuzhiyun        LXMLTreeBuilder,
23*4882a593Smuzhiyun        )
24*4882a593Smuzhiyun    LXML_PRESENT = True
25*4882a593Smuzhiyunexcept ImportError:
26*4882a593Smuzhiyun    LXML_PRESENT = False
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun
29*4882a593Smuzhiyunclass BuiltInRegistryTest(unittest.TestCase):
30*4882a593Smuzhiyun    """Test the built-in registry with the default builders registered."""
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun    def test_combination(self):
33*4882a593Smuzhiyun        if LXML_PRESENT:
34*4882a593Smuzhiyun            self.assertEqual(registry.lookup('fast', 'html'),
35*4882a593Smuzhiyun                             LXMLTreeBuilder)
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun        if LXML_PRESENT:
38*4882a593Smuzhiyun            self.assertEqual(registry.lookup('permissive', 'xml'),
39*4882a593Smuzhiyun                             LXMLTreeBuilderForXML)
40*4882a593Smuzhiyun        self.assertEqual(registry.lookup('strict', 'html'),
41*4882a593Smuzhiyun                          HTMLParserTreeBuilder)
42*4882a593Smuzhiyun        if HTML5LIB_PRESENT:
43*4882a593Smuzhiyun            self.assertEqual(registry.lookup('html5lib', 'html'),
44*4882a593Smuzhiyun                              HTML5TreeBuilder)
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun    def test_lookup_by_markup_type(self):
47*4882a593Smuzhiyun        if LXML_PRESENT:
48*4882a593Smuzhiyun            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
49*4882a593Smuzhiyun            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
50*4882a593Smuzhiyun        else:
51*4882a593Smuzhiyun            self.assertEqual(registry.lookup('xml'), None)
52*4882a593Smuzhiyun            if HTML5LIB_PRESENT:
53*4882a593Smuzhiyun                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
54*4882a593Smuzhiyun            else:
55*4882a593Smuzhiyun                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun    def test_named_library(self):
58*4882a593Smuzhiyun        if LXML_PRESENT:
59*4882a593Smuzhiyun            self.assertEqual(registry.lookup('lxml', 'xml'),
60*4882a593Smuzhiyun                             LXMLTreeBuilderForXML)
61*4882a593Smuzhiyun            self.assertEqual(registry.lookup('lxml', 'html'),
62*4882a593Smuzhiyun                             LXMLTreeBuilder)
63*4882a593Smuzhiyun        if HTML5LIB_PRESENT:
64*4882a593Smuzhiyun            self.assertEqual(registry.lookup('html5lib'),
65*4882a593Smuzhiyun                              HTML5TreeBuilder)
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun        self.assertEqual(registry.lookup('html.parser'),
68*4882a593Smuzhiyun                          HTMLParserTreeBuilder)
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun    def test_beautifulsoup_constructor_does_lookup(self):
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun        with warnings.catch_warnings(record=True) as w:
73*4882a593Smuzhiyun            # This will create a warning about not explicitly
74*4882a593Smuzhiyun            # specifying a parser, but we'll ignore it.
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun            # You can pass in a string.
77*4882a593Smuzhiyun            BeautifulSoup("", features="html")
78*4882a593Smuzhiyun            # Or a list of strings.
79*4882a593Smuzhiyun            BeautifulSoup("", features=["html", "fast"])
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun        # You'll get an exception if BS can't find an appropriate
82*4882a593Smuzhiyun        # builder.
83*4882a593Smuzhiyun        self.assertRaises(ValueError, BeautifulSoup,
84*4882a593Smuzhiyun                          "", features="no-such-feature")
85*4882a593Smuzhiyun
86*4882a593Smuzhiyunclass RegistryTest(unittest.TestCase):
87*4882a593Smuzhiyun    """Test the TreeBuilderRegistry class in general."""
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun    def setUp(self):
90*4882a593Smuzhiyun        self.registry = TreeBuilderRegistry()
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun    def builder_for_features(self, *feature_list):
93*4882a593Smuzhiyun        cls = type('Builder_' + '_'.join(feature_list),
94*4882a593Smuzhiyun                   (object,), {'features' : feature_list})
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun        self.registry.register(cls)
97*4882a593Smuzhiyun        return cls
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun    def test_register_with_no_features(self):
100*4882a593Smuzhiyun        builder = self.builder_for_features()
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun        # Since the builder advertises no features, you can't find it
103*4882a593Smuzhiyun        # by looking up features.
104*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('foo'), None)
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun        # But you can find it by doing a lookup with no features, if
107*4882a593Smuzhiyun        # this happens to be the only registered builder.
108*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup(), builder)
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun    def test_register_with_features_makes_lookup_succeed(self):
111*4882a593Smuzhiyun        builder = self.builder_for_features('foo', 'bar')
112*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('foo'), builder)
113*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('bar'), builder)
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun    def test_lookup_fails_when_no_builder_implements_feature(self):
116*4882a593Smuzhiyun        builder = self.builder_for_features('foo', 'bar')
117*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('baz'), None)
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
120*4882a593Smuzhiyun        builder1 = self.builder_for_features('foo')
121*4882a593Smuzhiyun        builder2 = self.builder_for_features('bar')
122*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup(), builder2)
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun    def test_lookup_fails_when_no_tree_builders_registered(self):
125*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup(), None)
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
128*4882a593Smuzhiyun        has_one = self.builder_for_features('foo')
129*4882a593Smuzhiyun        has_the_other = self.builder_for_features('bar')
130*4882a593Smuzhiyun        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
131*4882a593Smuzhiyun        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
132*4882a593Smuzhiyun        lacks_one = self.builder_for_features('bar')
133*4882a593Smuzhiyun        has_the_other = self.builder_for_features('foo')
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun        # There are two builders featuring 'foo' and 'bar', but
136*4882a593Smuzhiyun        # the one that also features 'quux' was registered later.
137*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('foo', 'bar'),
138*4882a593Smuzhiyun                          has_both_late)
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun        # There is only one builder featuring 'foo', 'bar', and 'baz'.
141*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
142*4882a593Smuzhiyun                          has_both_early)
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
145*4882a593Smuzhiyun        builder1 = self.builder_for_features('foo', 'bar')
146*4882a593Smuzhiyun        builder2 = self.builder_for_features('foo', 'baz')
147*4882a593Smuzhiyun        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
148