xref: /OK3568_Linux_fs/yocto/poky/bitbake/lib/bs4/tests/test_tree.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun# -*- coding: utf-8 -*-
2*4882a593Smuzhiyun"""Tests for Beautiful Soup's tree traversal methods.
3*4882a593Smuzhiyun
4*4882a593SmuzhiyunThe tree traversal methods are the main advantage of using Beautiful
5*4882a593SmuzhiyunSoup over just using a parser.
6*4882a593Smuzhiyun
7*4882a593SmuzhiyunDifferent parsers will build different Beautiful Soup trees given the
8*4882a593Smuzhiyunsame markup, but all Beautiful Soup trees can be traversed with the
9*4882a593Smuzhiyunmethods tested here.
10*4882a593Smuzhiyun"""
11*4882a593Smuzhiyun
12*4882a593Smuzhiyunimport copy
13*4882a593Smuzhiyunimport pickle
14*4882a593Smuzhiyunimport re
15*4882a593Smuzhiyunimport warnings
16*4882a593Smuzhiyunfrom bs4 import BeautifulSoup
17*4882a593Smuzhiyunfrom bs4.builder import builder_registry
18*4882a593Smuzhiyunfrom bs4.element import (
19*4882a593Smuzhiyun    PY3K,
20*4882a593Smuzhiyun    CData,
21*4882a593Smuzhiyun    Comment,
22*4882a593Smuzhiyun    Declaration,
23*4882a593Smuzhiyun    Doctype,
24*4882a593Smuzhiyun    NavigableString,
25*4882a593Smuzhiyun    SoupStrainer,
26*4882a593Smuzhiyun    Tag,
27*4882a593Smuzhiyun)
28*4882a593Smuzhiyunfrom bs4.testing import SoupTest
29*4882a593Smuzhiyun
30*4882a593SmuzhiyunXML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
31*4882a593SmuzhiyunLXML_PRESENT = (builder_registry.lookup("lxml") is not None)
32*4882a593Smuzhiyun
33*4882a593Smuzhiyunclass TreeTest(SoupTest):
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun    def assertSelects(self, tags, should_match):
36*4882a593Smuzhiyun        """Make sure that the given tags have the correct text.
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun        This is used in tests that define a bunch of tags, each
39*4882a593Smuzhiyun        containing a single string, and then select certain strings by
40*4882a593Smuzhiyun        some mechanism.
41*4882a593Smuzhiyun        """
42*4882a593Smuzhiyun        self.assertEqual([tag.string for tag in tags], should_match)
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun    def assertSelectsIDs(self, tags, should_match):
45*4882a593Smuzhiyun        """Make sure that the given tags have the correct IDs.
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun        This is used in tests that define a bunch of tags, each
48*4882a593Smuzhiyun        containing a single string, and then select certain strings by
49*4882a593Smuzhiyun        some mechanism.
50*4882a593Smuzhiyun        """
51*4882a593Smuzhiyun        self.assertEqual([tag['id'] for tag in tags], should_match)
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun
54*4882a593Smuzhiyunclass TestFind(TreeTest):
55*4882a593Smuzhiyun    """Basic tests of the find() method.
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun    find() just calls find_all() with limit=1, so it's not tested all
58*4882a593Smuzhiyun    that thouroughly here.
59*4882a593Smuzhiyun    """
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun    def test_find_tag(self):
62*4882a593Smuzhiyun        soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
63*4882a593Smuzhiyun        self.assertEqual(soup.find("b").string, "2")
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun    def test_unicode_text_find(self):
66*4882a593Smuzhiyun        soup = self.soup('<h1>Räksmörgås</h1>')
67*4882a593Smuzhiyun        self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun    def test_unicode_attribute_find(self):
70*4882a593Smuzhiyun        soup = self.soup('<h1 id="Räksmörgås">here it is</h1>')
71*4882a593Smuzhiyun        str(soup)
72*4882a593Smuzhiyun        self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun    def test_find_everything(self):
76*4882a593Smuzhiyun        """Test an optimization that finds all tags."""
77*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><b>bar</b>")
78*4882a593Smuzhiyun        self.assertEqual(2, len(soup.find_all()))
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun    def test_find_everything_with_name(self):
81*4882a593Smuzhiyun        """Test an optimization that finds all tags with a given name."""
82*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><b>bar</b><a>baz</a>")
83*4882a593Smuzhiyun        self.assertEqual(2, len(soup.find_all('a')))
84*4882a593Smuzhiyun
85*4882a593Smuzhiyunclass TestFindAll(TreeTest):
86*4882a593Smuzhiyun    """Basic tests of the find_all() method."""
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun    def test_find_all_text_nodes(self):
89*4882a593Smuzhiyun        """You can search the tree for text nodes."""
90*4882a593Smuzhiyun        soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
91*4882a593Smuzhiyun        # Exact match.
92*4882a593Smuzhiyun        self.assertEqual(soup.find_all(string="bar"), ["bar"])
93*4882a593Smuzhiyun        self.assertEqual(soup.find_all(text="bar"), ["bar"])
94*4882a593Smuzhiyun        # Match any of a number of strings.
95*4882a593Smuzhiyun        self.assertEqual(
96*4882a593Smuzhiyun            soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
97*4882a593Smuzhiyun        # Match a regular expression.
98*4882a593Smuzhiyun        self.assertEqual(soup.find_all(text=re.compile('.*')),
99*4882a593Smuzhiyun                         ["Foo", "bar", '\xbb'])
100*4882a593Smuzhiyun        # Match anything.
101*4882a593Smuzhiyun        self.assertEqual(soup.find_all(text=True),
102*4882a593Smuzhiyun                         ["Foo", "bar", '\xbb'])
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun    def test_find_all_limit(self):
105*4882a593Smuzhiyun        """You can limit the number of items returned by find_all."""
106*4882a593Smuzhiyun        soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
107*4882a593Smuzhiyun        self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
108*4882a593Smuzhiyun        self.assertSelects(soup.find_all('a', limit=1), ["1"])
109*4882a593Smuzhiyun        self.assertSelects(
110*4882a593Smuzhiyun            soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun        # A limit of 0 means no limit.
113*4882a593Smuzhiyun        self.assertSelects(
114*4882a593Smuzhiyun            soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun    def test_calling_a_tag_is_calling_findall(self):
117*4882a593Smuzhiyun        soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
118*4882a593Smuzhiyun        self.assertSelects(soup('a', limit=1), ["1"])
119*4882a593Smuzhiyun        self.assertSelects(soup.b(id="foo"), ["3"])
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun    def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
122*4882a593Smuzhiyun        soup = self.soup("<a></a>")
123*4882a593Smuzhiyun        # Create a self-referential list.
124*4882a593Smuzhiyun        l = []
125*4882a593Smuzhiyun        l.append(l)
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun        # Without special code in _normalize_search_value, this would cause infinite
128*4882a593Smuzhiyun        # recursion.
129*4882a593Smuzhiyun        self.assertEqual([], soup.find_all(l))
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun    def test_find_all_resultset(self):
132*4882a593Smuzhiyun        """All find_all calls return a ResultSet"""
133*4882a593Smuzhiyun        soup = self.soup("<a></a>")
134*4882a593Smuzhiyun        result = soup.find_all("a")
135*4882a593Smuzhiyun        self.assertTrue(hasattr(result, "source"))
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun        result = soup.find_all(True)
138*4882a593Smuzhiyun        self.assertTrue(hasattr(result, "source"))
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun        result = soup.find_all(text="foo")
141*4882a593Smuzhiyun        self.assertTrue(hasattr(result, "source"))
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun
144*4882a593Smuzhiyunclass TestFindAllBasicNamespaces(TreeTest):
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun    def test_find_by_namespaced_name(self):
147*4882a593Smuzhiyun        soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
148*4882a593Smuzhiyun        self.assertEqual("4", soup.find("mathml:msqrt").string)
149*4882a593Smuzhiyun        self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun
152*4882a593Smuzhiyunclass TestFindAllByName(TreeTest):
153*4882a593Smuzhiyun    """Test ways of finding tags by tag name."""
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun    def setUp(self):
156*4882a593Smuzhiyun        super(TreeTest, self).setUp()
157*4882a593Smuzhiyun        self.tree =  self.soup("""<a>First tag.</a>
158*4882a593Smuzhiyun                                  <b>Second tag.</b>
159*4882a593Smuzhiyun                                  <c>Third <a>Nested tag.</a> tag.</c>""")
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun    def test_find_all_by_tag_name(self):
162*4882a593Smuzhiyun        # Find all the <a> tags.
163*4882a593Smuzhiyun        self.assertSelects(
164*4882a593Smuzhiyun            self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun    def test_find_all_by_name_and_text(self):
167*4882a593Smuzhiyun        self.assertSelects(
168*4882a593Smuzhiyun            self.tree.find_all('a', text='First tag.'), ['First tag.'])
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun        self.assertSelects(
171*4882a593Smuzhiyun            self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun        self.assertSelects(
174*4882a593Smuzhiyun            self.tree.find_all('a', text=re.compile("tag")),
175*4882a593Smuzhiyun            ['First tag.', 'Nested tag.'])
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun    def test_find_all_on_non_root_element(self):
179*4882a593Smuzhiyun        # You can call find_all on any node, not just the root.
180*4882a593Smuzhiyun        self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun    def test_calling_element_invokes_find_all(self):
183*4882a593Smuzhiyun        self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun    def test_find_all_by_tag_strainer(self):
186*4882a593Smuzhiyun        self.assertSelects(
187*4882a593Smuzhiyun            self.tree.find_all(SoupStrainer('a')),
188*4882a593Smuzhiyun            ['First tag.', 'Nested tag.'])
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun    def test_find_all_by_tag_names(self):
191*4882a593Smuzhiyun        self.assertSelects(
192*4882a593Smuzhiyun            self.tree.find_all(['a', 'b']),
193*4882a593Smuzhiyun            ['First tag.', 'Second tag.', 'Nested tag.'])
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun    def test_find_all_by_tag_dict(self):
196*4882a593Smuzhiyun        self.assertSelects(
197*4882a593Smuzhiyun            self.tree.find_all({'a' : True, 'b' : True}),
198*4882a593Smuzhiyun            ['First tag.', 'Second tag.', 'Nested tag.'])
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun    def test_find_all_by_tag_re(self):
201*4882a593Smuzhiyun        self.assertSelects(
202*4882a593Smuzhiyun            self.tree.find_all(re.compile('^[ab]$')),
203*4882a593Smuzhiyun            ['First tag.', 'Second tag.', 'Nested tag.'])
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun    def test_find_all_with_tags_matching_method(self):
206*4882a593Smuzhiyun        # You can define an oracle method that determines whether
207*4882a593Smuzhiyun        # a tag matches the search.
208*4882a593Smuzhiyun        def id_matches_name(tag):
209*4882a593Smuzhiyun            return tag.name == tag.get('id')
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun        tree = self.soup("""<a id="a">Match 1.</a>
212*4882a593Smuzhiyun                            <a id="1">Does not match.</a>
213*4882a593Smuzhiyun                            <b id="b">Match 2.</a>""")
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun        self.assertSelects(
216*4882a593Smuzhiyun            tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun
219*4882a593Smuzhiyunclass TestFindAllByAttribute(TreeTest):
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun    def test_find_all_by_attribute_name(self):
222*4882a593Smuzhiyun        # You can pass in keyword arguments to find_all to search by
223*4882a593Smuzhiyun        # attribute.
224*4882a593Smuzhiyun        tree = self.soup("""
225*4882a593Smuzhiyun                         <a id="first">Matching a.</a>
226*4882a593Smuzhiyun                         <a id="second">
227*4882a593Smuzhiyun                          Non-matching <b id="first">Matching b.</b>a.
228*4882a593Smuzhiyun                         </a>""")
229*4882a593Smuzhiyun        self.assertSelects(tree.find_all(id='first'),
230*4882a593Smuzhiyun                           ["Matching a.", "Matching b."])
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun    def test_find_all_by_utf8_attribute_value(self):
233*4882a593Smuzhiyun        peace = "םולש".encode("utf8")
234*4882a593Smuzhiyun        data = '<a title="םולש"></a>'.encode("utf8")
235*4882a593Smuzhiyun        soup = self.soup(data)
236*4882a593Smuzhiyun        self.assertEqual([soup.a], soup.find_all(title=peace))
237*4882a593Smuzhiyun        self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
238*4882a593Smuzhiyun        self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun    def test_find_all_by_attribute_dict(self):
241*4882a593Smuzhiyun        # You can pass in a dictionary as the argument 'attrs'. This
242*4882a593Smuzhiyun        # lets you search for attributes like 'name' (a fixed argument
243*4882a593Smuzhiyun        # to find_all) and 'class' (a reserved word in Python.)
244*4882a593Smuzhiyun        tree = self.soup("""
245*4882a593Smuzhiyun                         <a name="name1" class="class1">Name match.</a>
246*4882a593Smuzhiyun                         <a name="name2" class="class2">Class match.</a>
247*4882a593Smuzhiyun                         <a name="name3" class="class3">Non-match.</a>
248*4882a593Smuzhiyun                         <name1>A tag called 'name1'.</name1>
249*4882a593Smuzhiyun                         """)
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun        # This doesn't do what you want.
252*4882a593Smuzhiyun        self.assertSelects(tree.find_all(name='name1'),
253*4882a593Smuzhiyun                           ["A tag called 'name1'."])
254*4882a593Smuzhiyun        # This does what you want.
255*4882a593Smuzhiyun        self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
256*4882a593Smuzhiyun                           ["Name match."])
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun        self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
259*4882a593Smuzhiyun                           ["Class match."])
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun    def test_find_all_by_class(self):
262*4882a593Smuzhiyun        tree = self.soup("""
263*4882a593Smuzhiyun                         <a class="1">Class 1.</a>
264*4882a593Smuzhiyun                         <a class="2">Class 2.</a>
265*4882a593Smuzhiyun                         <b class="1">Class 1.</b>
266*4882a593Smuzhiyun                         <c class="3 4">Class 3 and 4.</c>
267*4882a593Smuzhiyun                         """)
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun        # Passing in the class_ keyword argument will search against
270*4882a593Smuzhiyun        # the 'class' attribute.
271*4882a593Smuzhiyun        self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.'])
272*4882a593Smuzhiyun        self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.'])
273*4882a593Smuzhiyun        self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.'])
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun        # Passing in a string to 'attrs' will also search the CSS class.
276*4882a593Smuzhiyun        self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
277*4882a593Smuzhiyun        self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
278*4882a593Smuzhiyun        self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
279*4882a593Smuzhiyun        self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun    def test_find_by_class_when_multiple_classes_present(self):
282*4882a593Smuzhiyun        tree = self.soup("<gar class='foo bar'>Found it</gar>")
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun        f = tree.find_all("gar", class_=re.compile("o"))
285*4882a593Smuzhiyun        self.assertSelects(f, ["Found it"])
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun        f = tree.find_all("gar", class_=re.compile("a"))
288*4882a593Smuzhiyun        self.assertSelects(f, ["Found it"])
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun        # Since the class is not the string "foo bar", but the two
291*4882a593Smuzhiyun        # strings "foo" and "bar", this will not find anything.
292*4882a593Smuzhiyun        f = tree.find_all("gar", class_=re.compile("o b"))
293*4882a593Smuzhiyun        self.assertSelects(f, [])
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun    def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
296*4882a593Smuzhiyun        soup = self.soup("<a class='bar'>Found it</a>")
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun        self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun        def big_attribute_value(value):
301*4882a593Smuzhiyun            return len(value) > 3
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun        self.assertSelects(soup.find_all("a", big_attribute_value), [])
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun        def small_attribute_value(value):
306*4882a593Smuzhiyun            return len(value) <= 3
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun        self.assertSelects(
309*4882a593Smuzhiyun            soup.find_all("a", small_attribute_value), ["Found it"])
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun    def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
312*4882a593Smuzhiyun        soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
313*4882a593Smuzhiyun        a, a2 = soup.find_all("a")
314*4882a593Smuzhiyun        self.assertEqual([a, a2], soup.find_all("a", "foo"))
315*4882a593Smuzhiyun        self.assertEqual([a], soup.find_all("a", "bar"))
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun        # If you specify the class as a string that contains a
318*4882a593Smuzhiyun        # space, only that specific value will be found.
319*4882a593Smuzhiyun        self.assertEqual([a], soup.find_all("a", class_="foo bar"))
320*4882a593Smuzhiyun        self.assertEqual([a], soup.find_all("a", "foo bar"))
321*4882a593Smuzhiyun        self.assertEqual([], soup.find_all("a", "bar foo"))
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun    def test_find_all_by_attribute_soupstrainer(self):
324*4882a593Smuzhiyun        tree = self.soup("""
325*4882a593Smuzhiyun                         <a id="first">Match.</a>
326*4882a593Smuzhiyun                         <a id="second">Non-match.</a>""")
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun        strainer = SoupStrainer(attrs={'id' : 'first'})
329*4882a593Smuzhiyun        self.assertSelects(tree.find_all(strainer), ['Match.'])
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun    def test_find_all_with_missing_atribute(self):
332*4882a593Smuzhiyun        # You can pass in None as the value of an attribute to find_all.
333*4882a593Smuzhiyun        # This will match tags that do not have that attribute set.
334*4882a593Smuzhiyun        tree = self.soup("""<a id="1">ID present.</a>
335*4882a593Smuzhiyun                            <a>No ID present.</a>
336*4882a593Smuzhiyun                            <a id="">ID is empty.</a>""")
337*4882a593Smuzhiyun        self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun    def test_find_all_with_defined_attribute(self):
340*4882a593Smuzhiyun        # You can pass in None as the value of an attribute to find_all.
341*4882a593Smuzhiyun        # This will match tags that have that attribute set to any value.
342*4882a593Smuzhiyun        tree = self.soup("""<a id="1">ID present.</a>
343*4882a593Smuzhiyun                            <a>No ID present.</a>
344*4882a593Smuzhiyun                            <a id="">ID is empty.</a>""")
345*4882a593Smuzhiyun        self.assertSelects(
346*4882a593Smuzhiyun            tree.find_all(id=True), ["ID present.", "ID is empty."])
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun    def test_find_all_with_numeric_attribute(self):
349*4882a593Smuzhiyun        # If you search for a number, it's treated as a string.
350*4882a593Smuzhiyun        tree = self.soup("""<a id=1>Unquoted attribute.</a>
351*4882a593Smuzhiyun                            <a id="1">Quoted attribute.</a>""")
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun        expected = ["Unquoted attribute.", "Quoted attribute."]
354*4882a593Smuzhiyun        self.assertSelects(tree.find_all(id=1), expected)
355*4882a593Smuzhiyun        self.assertSelects(tree.find_all(id="1"), expected)
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun    def test_find_all_with_list_attribute_values(self):
358*4882a593Smuzhiyun        # You can pass a list of attribute values instead of just one,
359*4882a593Smuzhiyun        # and you'll get tags that match any of the values.
360*4882a593Smuzhiyun        tree = self.soup("""<a id="1">1</a>
361*4882a593Smuzhiyun                            <a id="2">2</a>
362*4882a593Smuzhiyun                            <a id="3">3</a>
363*4882a593Smuzhiyun                            <a>No ID.</a>""")
364*4882a593Smuzhiyun        self.assertSelects(tree.find_all(id=["1", "3", "4"]),
365*4882a593Smuzhiyun                           ["1", "3"])
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun    def test_find_all_with_regular_expression_attribute_value(self):
368*4882a593Smuzhiyun        # You can pass a regular expression as an attribute value, and
369*4882a593Smuzhiyun        # you'll get tags whose values for that attribute match the
370*4882a593Smuzhiyun        # regular expression.
371*4882a593Smuzhiyun        tree = self.soup("""<a id="a">One a.</a>
372*4882a593Smuzhiyun                            <a id="aa">Two as.</a>
373*4882a593Smuzhiyun                            <a id="ab">Mixed as and bs.</a>
374*4882a593Smuzhiyun                            <a id="b">One b.</a>
375*4882a593Smuzhiyun                            <a>No ID.</a>""")
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun        self.assertSelects(tree.find_all(id=re.compile("^a+$")),
378*4882a593Smuzhiyun                           ["One a.", "Two as."])
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun    def test_find_by_name_and_containing_string(self):
381*4882a593Smuzhiyun        soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
382*4882a593Smuzhiyun        a = soup.a
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun        self.assertEqual([a], soup.find_all("a", text="foo"))
385*4882a593Smuzhiyun        self.assertEqual([], soup.find_all("a", text="bar"))
386*4882a593Smuzhiyun        self.assertEqual([], soup.find_all("a", text="bar"))
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun    def test_find_by_name_and_containing_string_when_string_is_buried(self):
389*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
390*4882a593Smuzhiyun        self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun    def test_find_by_attribute_and_containing_string(self):
393*4882a593Smuzhiyun        soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
394*4882a593Smuzhiyun        a = soup.a
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun        self.assertEqual([a], soup.find_all(id=2, text="foo"))
397*4882a593Smuzhiyun        self.assertEqual([], soup.find_all(id=1, text="bar"))
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun
402*4882a593Smuzhiyunclass TestIndex(TreeTest):
403*4882a593Smuzhiyun    """Test Tag.index"""
404*4882a593Smuzhiyun    def test_index(self):
405*4882a593Smuzhiyun        tree = self.soup("""<div>
406*4882a593Smuzhiyun                            <a>Identical</a>
407*4882a593Smuzhiyun                            <b>Not identical</b>
408*4882a593Smuzhiyun                            <a>Identical</a>
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun                            <c><d>Identical with child</d></c>
411*4882a593Smuzhiyun                            <b>Also not identical</b>
412*4882a593Smuzhiyun                            <c><d>Identical with child</d></c>
413*4882a593Smuzhiyun                            </div>""")
414*4882a593Smuzhiyun        div = tree.div
415*4882a593Smuzhiyun        for i, element in enumerate(div.contents):
416*4882a593Smuzhiyun            self.assertEqual(i, div.index(element))
417*4882a593Smuzhiyun        self.assertRaises(ValueError, tree.index, 1)
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun
420*4882a593Smuzhiyunclass TestParentOperations(TreeTest):
421*4882a593Smuzhiyun    """Test navigation and searching through an element's parents."""
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun    def setUp(self):
424*4882a593Smuzhiyun        super(TestParentOperations, self).setUp()
425*4882a593Smuzhiyun        self.tree = self.soup('''<ul id="empty"></ul>
426*4882a593Smuzhiyun                                 <ul id="top">
427*4882a593Smuzhiyun                                  <ul id="middle">
428*4882a593Smuzhiyun                                   <ul id="bottom">
429*4882a593Smuzhiyun                                    <b>Start here</b>
430*4882a593Smuzhiyun                                   </ul>
431*4882a593Smuzhiyun                                  </ul>''')
432*4882a593Smuzhiyun        self.start = self.tree.b
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun    def test_parent(self):
436*4882a593Smuzhiyun        self.assertEqual(self.start.parent['id'], 'bottom')
437*4882a593Smuzhiyun        self.assertEqual(self.start.parent.parent['id'], 'middle')
438*4882a593Smuzhiyun        self.assertEqual(self.start.parent.parent.parent['id'], 'top')
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun    def test_parent_of_top_tag_is_soup_object(self):
441*4882a593Smuzhiyun        top_tag = self.tree.contents[0]
442*4882a593Smuzhiyun        self.assertEqual(top_tag.parent, self.tree)
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun    def test_soup_object_has_no_parent(self):
445*4882a593Smuzhiyun        self.assertEqual(None, self.tree.parent)
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun    def test_find_parents(self):
448*4882a593Smuzhiyun        self.assertSelectsIDs(
449*4882a593Smuzhiyun            self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
450*4882a593Smuzhiyun        self.assertSelectsIDs(
451*4882a593Smuzhiyun            self.start.find_parents('ul', id="middle"), ['middle'])
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun    def test_find_parent(self):
454*4882a593Smuzhiyun        self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
455*4882a593Smuzhiyun        self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top')
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun    def test_parent_of_text_element(self):
458*4882a593Smuzhiyun        text = self.tree.find(text="Start here")
459*4882a593Smuzhiyun        self.assertEqual(text.parent.name, 'b')
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun    def test_text_element_find_parent(self):
462*4882a593Smuzhiyun        text = self.tree.find(text="Start here")
463*4882a593Smuzhiyun        self.assertEqual(text.find_parent('ul')['id'], 'bottom')
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun    def test_parent_generator(self):
466*4882a593Smuzhiyun        parents = [parent['id'] for parent in self.start.parents
467*4882a593Smuzhiyun                   if parent is not None and 'id' in parent.attrs]
468*4882a593Smuzhiyun        self.assertEqual(parents, ['bottom', 'middle', 'top'])
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun
471*4882a593Smuzhiyunclass ProximityTest(TreeTest):
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun    def setUp(self):
474*4882a593Smuzhiyun        super(TreeTest, self).setUp()
475*4882a593Smuzhiyun        self.tree = self.soup(
476*4882a593Smuzhiyun            '<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun
479*4882a593Smuzhiyunclass TestNextOperations(ProximityTest):
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun    def setUp(self):
482*4882a593Smuzhiyun        super(TestNextOperations, self).setUp()
483*4882a593Smuzhiyun        self.start = self.tree.b
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun    def test_next(self):
486*4882a593Smuzhiyun        self.assertEqual(self.start.next_element, "One")
487*4882a593Smuzhiyun        self.assertEqual(self.start.next_element.next_element['id'], "2")
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun    def test_next_of_last_item_is_none(self):
490*4882a593Smuzhiyun        last = self.tree.find(text="Three")
491*4882a593Smuzhiyun        self.assertEqual(last.next_element, None)
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun    def test_next_of_root_is_none(self):
494*4882a593Smuzhiyun        # The document root is outside the next/previous chain.
495*4882a593Smuzhiyun        self.assertEqual(self.tree.next_element, None)
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun    def test_find_all_next(self):
498*4882a593Smuzhiyun        self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
499*4882a593Smuzhiyun        self.start.find_all_next(id=3)
500*4882a593Smuzhiyun        self.assertSelects(self.start.find_all_next(id=3), ["Three"])
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun    def test_find_next(self):
503*4882a593Smuzhiyun        self.assertEqual(self.start.find_next('b')['id'], '2')
504*4882a593Smuzhiyun        self.assertEqual(self.start.find_next(text="Three"), "Three")
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun    def test_find_next_for_text_element(self):
507*4882a593Smuzhiyun        text = self.tree.find(text="One")
508*4882a593Smuzhiyun        self.assertEqual(text.find_next("b").string, "Two")
509*4882a593Smuzhiyun        self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun    def test_next_generator(self):
512*4882a593Smuzhiyun        start = self.tree.find(text="Two")
513*4882a593Smuzhiyun        successors = [node for node in start.next_elements]
514*4882a593Smuzhiyun        # There are two successors: the final <b> tag and its text contents.
515*4882a593Smuzhiyun        tag, contents = successors
516*4882a593Smuzhiyun        self.assertEqual(tag['id'], '3')
517*4882a593Smuzhiyun        self.assertEqual(contents, "Three")
518*4882a593Smuzhiyun
519*4882a593Smuzhiyunclass TestPreviousOperations(ProximityTest):
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun    def setUp(self):
522*4882a593Smuzhiyun        super(TestPreviousOperations, self).setUp()
523*4882a593Smuzhiyun        self.end = self.tree.find(text="Three")
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun    def test_previous(self):
526*4882a593Smuzhiyun        self.assertEqual(self.end.previous_element['id'], "3")
527*4882a593Smuzhiyun        self.assertEqual(self.end.previous_element.previous_element, "Two")
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun    def test_previous_of_first_item_is_none(self):
530*4882a593Smuzhiyun        first = self.tree.find('html')
531*4882a593Smuzhiyun        self.assertEqual(first.previous_element, None)
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun    def test_previous_of_root_is_none(self):
534*4882a593Smuzhiyun        # The document root is outside the next/previous chain.
535*4882a593Smuzhiyun        # XXX This is broken!
536*4882a593Smuzhiyun        #self.assertEqual(self.tree.previous_element, None)
537*4882a593Smuzhiyun        pass
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun    def test_find_all_previous(self):
540*4882a593Smuzhiyun        # The <b> tag containing the "Three" node is the predecessor
541*4882a593Smuzhiyun        # of the "Three" node itself, which is why "Three" shows up
542*4882a593Smuzhiyun        # here.
543*4882a593Smuzhiyun        self.assertSelects(
544*4882a593Smuzhiyun            self.end.find_all_previous('b'), ["Three", "Two", "One"])
545*4882a593Smuzhiyun        self.assertSelects(self.end.find_all_previous(id=1), ["One"])
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun    def test_find_previous(self):
548*4882a593Smuzhiyun        self.assertEqual(self.end.find_previous('b')['id'], '3')
549*4882a593Smuzhiyun        self.assertEqual(self.end.find_previous(text="One"), "One")
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun    def test_find_previous_for_text_element(self):
552*4882a593Smuzhiyun        text = self.tree.find(text="Three")
553*4882a593Smuzhiyun        self.assertEqual(text.find_previous("b").string, "Three")
554*4882a593Smuzhiyun        self.assertSelects(
555*4882a593Smuzhiyun            text.find_all_previous("b"), ["Three", "Two", "One"])
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun    def test_previous_generator(self):
558*4882a593Smuzhiyun        start = self.tree.find(text="One")
559*4882a593Smuzhiyun        predecessors = [node for node in start.previous_elements]
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun        # There are four predecessors: the <b> tag containing "One"
562*4882a593Smuzhiyun        # the <body> tag, the <head> tag, and the <html> tag.
563*4882a593Smuzhiyun        b, body, head, html = predecessors
564*4882a593Smuzhiyun        self.assertEqual(b['id'], '1')
565*4882a593Smuzhiyun        self.assertEqual(body.name, "body")
566*4882a593Smuzhiyun        self.assertEqual(head.name, "head")
567*4882a593Smuzhiyun        self.assertEqual(html.name, "html")
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun
570*4882a593Smuzhiyunclass SiblingTest(TreeTest):
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun    def setUp(self):
573*4882a593Smuzhiyun        super(SiblingTest, self).setUp()
574*4882a593Smuzhiyun        markup = '''<html>
575*4882a593Smuzhiyun                    <span id="1">
576*4882a593Smuzhiyun                     <span id="1.1"></span>
577*4882a593Smuzhiyun                    </span>
578*4882a593Smuzhiyun                    <span id="2">
579*4882a593Smuzhiyun                     <span id="2.1"></span>
580*4882a593Smuzhiyun                    </span>
581*4882a593Smuzhiyun                    <span id="3">
582*4882a593Smuzhiyun                     <span id="3.1"></span>
583*4882a593Smuzhiyun                    </span>
584*4882a593Smuzhiyun                    <span id="4"></span>
585*4882a593Smuzhiyun                    </html>'''
586*4882a593Smuzhiyun        # All that whitespace looks good but makes the tests more
587*4882a593Smuzhiyun        # difficult. Get rid of it.
588*4882a593Smuzhiyun        markup = re.compile("\n\s*").sub("", markup)
589*4882a593Smuzhiyun        self.tree = self.soup(markup)
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun
592*4882a593Smuzhiyunclass TestNextSibling(SiblingTest):
593*4882a593Smuzhiyun
594*4882a593Smuzhiyun    def setUp(self):
595*4882a593Smuzhiyun        super(TestNextSibling, self).setUp()
596*4882a593Smuzhiyun        self.start = self.tree.find(id="1")
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun    def test_next_sibling_of_root_is_none(self):
599*4882a593Smuzhiyun        self.assertEqual(self.tree.next_sibling, None)
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun    def test_next_sibling(self):
602*4882a593Smuzhiyun        self.assertEqual(self.start.next_sibling['id'], '2')
603*4882a593Smuzhiyun        self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun        # Note the difference between next_sibling and next_element.
606*4882a593Smuzhiyun        self.assertEqual(self.start.next_element['id'], '1.1')
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun    def test_next_sibling_may_not_exist(self):
609*4882a593Smuzhiyun        self.assertEqual(self.tree.html.next_sibling, None)
610*4882a593Smuzhiyun
611*4882a593Smuzhiyun        nested_span = self.tree.find(id="1.1")
612*4882a593Smuzhiyun        self.assertEqual(nested_span.next_sibling, None)
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun        last_span = self.tree.find(id="4")
615*4882a593Smuzhiyun        self.assertEqual(last_span.next_sibling, None)
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun    def test_find_next_sibling(self):
618*4882a593Smuzhiyun        self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun    def test_next_siblings(self):
621*4882a593Smuzhiyun        self.assertSelectsIDs(self.start.find_next_siblings("span"),
622*4882a593Smuzhiyun                              ['2', '3', '4'])
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun        self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun    def test_next_sibling_for_text_element(self):
627*4882a593Smuzhiyun        soup = self.soup("Foo<b>bar</b>baz")
628*4882a593Smuzhiyun        start = soup.find(text="Foo")
629*4882a593Smuzhiyun        self.assertEqual(start.next_sibling.name, 'b')
630*4882a593Smuzhiyun        self.assertEqual(start.next_sibling.next_sibling, 'baz')
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun        self.assertSelects(start.find_next_siblings('b'), ['bar'])
633*4882a593Smuzhiyun        self.assertEqual(start.find_next_sibling(text="baz"), "baz")
634*4882a593Smuzhiyun        self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun
637*4882a593Smuzhiyunclass TestPreviousSibling(SiblingTest):
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun    def setUp(self):
640*4882a593Smuzhiyun        super(TestPreviousSibling, self).setUp()
641*4882a593Smuzhiyun        self.end = self.tree.find(id="4")
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun    def test_previous_sibling_of_root_is_none(self):
644*4882a593Smuzhiyun        self.assertEqual(self.tree.previous_sibling, None)
645*4882a593Smuzhiyun
646*4882a593Smuzhiyun    def test_previous_sibling(self):
647*4882a593Smuzhiyun        self.assertEqual(self.end.previous_sibling['id'], '3')
648*4882a593Smuzhiyun        self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun        # Note the difference between previous_sibling and previous_element.
651*4882a593Smuzhiyun        self.assertEqual(self.end.previous_element['id'], '3.1')
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun    def test_previous_sibling_may_not_exist(self):
654*4882a593Smuzhiyun        self.assertEqual(self.tree.html.previous_sibling, None)
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun        nested_span = self.tree.find(id="1.1")
657*4882a593Smuzhiyun        self.assertEqual(nested_span.previous_sibling, None)
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun        first_span = self.tree.find(id="1")
660*4882a593Smuzhiyun        self.assertEqual(first_span.previous_sibling, None)
661*4882a593Smuzhiyun
662*4882a593Smuzhiyun    def test_find_previous_sibling(self):
663*4882a593Smuzhiyun        self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun    def test_previous_siblings(self):
666*4882a593Smuzhiyun        self.assertSelectsIDs(self.end.find_previous_siblings("span"),
667*4882a593Smuzhiyun                              ['3', '2', '1'])
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun        self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun    def test_previous_sibling_for_text_element(self):
672*4882a593Smuzhiyun        soup = self.soup("Foo<b>bar</b>baz")
673*4882a593Smuzhiyun        start = soup.find(text="baz")
674*4882a593Smuzhiyun        self.assertEqual(start.previous_sibling.name, 'b')
675*4882a593Smuzhiyun        self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun        self.assertSelects(start.find_previous_siblings('b'), ['bar'])
678*4882a593Smuzhiyun        self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
679*4882a593Smuzhiyun        self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun
682*4882a593Smuzhiyunclass TestTagCreation(SoupTest):
683*4882a593Smuzhiyun    """Test the ability to create new tags."""
684*4882a593Smuzhiyun    def test_new_tag(self):
685*4882a593Smuzhiyun        soup = self.soup("")
686*4882a593Smuzhiyun        new_tag = soup.new_tag("foo", bar="baz")
687*4882a593Smuzhiyun        self.assertTrue(isinstance(new_tag, Tag))
688*4882a593Smuzhiyun        self.assertEqual("foo", new_tag.name)
689*4882a593Smuzhiyun        self.assertEqual(dict(bar="baz"), new_tag.attrs)
690*4882a593Smuzhiyun        self.assertEqual(None, new_tag.parent)
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun    def test_tag_inherits_self_closing_rules_from_builder(self):
693*4882a593Smuzhiyun        if XML_BUILDER_PRESENT:
694*4882a593Smuzhiyun            xml_soup = BeautifulSoup("", "lxml-xml")
695*4882a593Smuzhiyun            xml_br = xml_soup.new_tag("br")
696*4882a593Smuzhiyun            xml_p = xml_soup.new_tag("p")
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun            # Both the <br> and <p> tag are empty-element, just because
699*4882a593Smuzhiyun            # they have no contents.
700*4882a593Smuzhiyun            self.assertEqual(b"<br/>", xml_br.encode())
701*4882a593Smuzhiyun            self.assertEqual(b"<p/>", xml_p.encode())
702*4882a593Smuzhiyun
703*4882a593Smuzhiyun        html_soup = BeautifulSoup("", "html.parser")
704*4882a593Smuzhiyun        html_br = html_soup.new_tag("br")
705*4882a593Smuzhiyun        html_p = html_soup.new_tag("p")
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun        # The HTML builder users HTML's rules about which tags are
708*4882a593Smuzhiyun        # empty-element tags, and the new tags reflect these rules.
709*4882a593Smuzhiyun        self.assertEqual(b"<br/>", html_br.encode())
710*4882a593Smuzhiyun        self.assertEqual(b"<p></p>", html_p.encode())
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun    def test_new_string_creates_navigablestring(self):
713*4882a593Smuzhiyun        soup = self.soup("")
714*4882a593Smuzhiyun        s = soup.new_string("foo")
715*4882a593Smuzhiyun        self.assertEqual("foo", s)
716*4882a593Smuzhiyun        self.assertTrue(isinstance(s, NavigableString))
717*4882a593Smuzhiyun
718*4882a593Smuzhiyun    def test_new_string_can_create_navigablestring_subclass(self):
719*4882a593Smuzhiyun        soup = self.soup("")
720*4882a593Smuzhiyun        s = soup.new_string("foo", Comment)
721*4882a593Smuzhiyun        self.assertEqual("foo", s)
722*4882a593Smuzhiyun        self.assertTrue(isinstance(s, Comment))
723*4882a593Smuzhiyun
724*4882a593Smuzhiyunclass TestTreeModification(SoupTest):
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun    def test_attribute_modification(self):
727*4882a593Smuzhiyun        soup = self.soup('<a id="1"></a>')
728*4882a593Smuzhiyun        soup.a['id'] = 2
729*4882a593Smuzhiyun        self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
730*4882a593Smuzhiyun        del(soup.a['id'])
731*4882a593Smuzhiyun        self.assertEqual(soup.decode(), self.document_for('<a></a>'))
732*4882a593Smuzhiyun        soup.a['id2'] = 'foo'
733*4882a593Smuzhiyun        self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
734*4882a593Smuzhiyun
735*4882a593Smuzhiyun    def test_new_tag_creation(self):
736*4882a593Smuzhiyun        builder = builder_registry.lookup('html')()
737*4882a593Smuzhiyun        soup = self.soup("<body></body>", builder=builder)
738*4882a593Smuzhiyun        a = Tag(soup, builder, 'a')
739*4882a593Smuzhiyun        ol = Tag(soup, builder, 'ol')
740*4882a593Smuzhiyun        a['href'] = 'http://foo.com/'
741*4882a593Smuzhiyun        soup.body.insert(0, a)
742*4882a593Smuzhiyun        soup.body.insert(1, ol)
743*4882a593Smuzhiyun        self.assertEqual(
744*4882a593Smuzhiyun            soup.body.encode(),
745*4882a593Smuzhiyun            b'<body><a href="http://foo.com/"></a><ol></ol></body>')
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun    def test_append_to_contents_moves_tag(self):
748*4882a593Smuzhiyun        doc = """<p id="1">Don't leave me <b>here</b>.</p>
749*4882a593Smuzhiyun                <p id="2">Don\'t leave!</p>"""
750*4882a593Smuzhiyun        soup = self.soup(doc)
751*4882a593Smuzhiyun        second_para = soup.find(id='2')
752*4882a593Smuzhiyun        bold = soup.b
753*4882a593Smuzhiyun
754*4882a593Smuzhiyun        # Move the <b> tag to the end of the second paragraph.
755*4882a593Smuzhiyun        soup.find(id='2').append(soup.b)
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun        # The <b> tag is now a child of the second paragraph.
758*4882a593Smuzhiyun        self.assertEqual(bold.parent, second_para)
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun        self.assertEqual(
761*4882a593Smuzhiyun            soup.decode(), self.document_for(
762*4882a593Smuzhiyun                '<p id="1">Don\'t leave me .</p>\n'
763*4882a593Smuzhiyun                '<p id="2">Don\'t leave!<b>here</b></p>'))
764*4882a593Smuzhiyun
765*4882a593Smuzhiyun    def test_replace_with_returns_thing_that_was_replaced(self):
766*4882a593Smuzhiyun        text = "<a></a><b><c></c></b>"
767*4882a593Smuzhiyun        soup = self.soup(text)
768*4882a593Smuzhiyun        a = soup.a
769*4882a593Smuzhiyun        new_a = a.replace_with(soup.c)
770*4882a593Smuzhiyun        self.assertEqual(a, new_a)
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun    def test_unwrap_returns_thing_that_was_replaced(self):
773*4882a593Smuzhiyun        text = "<a><b></b><c></c></a>"
774*4882a593Smuzhiyun        soup = self.soup(text)
775*4882a593Smuzhiyun        a = soup.a
776*4882a593Smuzhiyun        new_a = a.unwrap()
777*4882a593Smuzhiyun        self.assertEqual(a, new_a)
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun    def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self):
780*4882a593Smuzhiyun        soup = self.soup("<a><b>Foo</b></a><c>Bar</c>")
781*4882a593Smuzhiyun        a = soup.a
782*4882a593Smuzhiyun        a.extract()
783*4882a593Smuzhiyun        self.assertEqual(None, a.parent)
784*4882a593Smuzhiyun        self.assertRaises(ValueError, a.unwrap)
785*4882a593Smuzhiyun        self.assertRaises(ValueError, a.replace_with, soup.c)
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun    def test_replace_tag_with_itself(self):
788*4882a593Smuzhiyun        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
789*4882a593Smuzhiyun        soup = self.soup(text)
790*4882a593Smuzhiyun        c = soup.c
791*4882a593Smuzhiyun        soup.c.replace_with(c)
792*4882a593Smuzhiyun        self.assertEqual(soup.decode(), self.document_for(text))
793*4882a593Smuzhiyun
794*4882a593Smuzhiyun    def test_replace_tag_with_its_parent_raises_exception(self):
795*4882a593Smuzhiyun        text = "<a><b></b></a>"
796*4882a593Smuzhiyun        soup = self.soup(text)
797*4882a593Smuzhiyun        self.assertRaises(ValueError, soup.b.replace_with, soup.a)
798*4882a593Smuzhiyun
799*4882a593Smuzhiyun    def test_insert_tag_into_itself_raises_exception(self):
800*4882a593Smuzhiyun        text = "<a><b></b></a>"
801*4882a593Smuzhiyun        soup = self.soup(text)
802*4882a593Smuzhiyun        self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
803*4882a593Smuzhiyun
804*4882a593Smuzhiyun    def test_replace_with_maintains_next_element_throughout(self):
805*4882a593Smuzhiyun        soup = self.soup('<p><a>one</a><b>three</b></p>')
806*4882a593Smuzhiyun        a = soup.a
807*4882a593Smuzhiyun        b = a.contents[0]
808*4882a593Smuzhiyun        # Make it so the <a> tag has two text children.
809*4882a593Smuzhiyun        a.insert(1, "two")
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun        # Now replace each one with the empty string.
812*4882a593Smuzhiyun        left, right = a.contents
813*4882a593Smuzhiyun        left.replaceWith('')
814*4882a593Smuzhiyun        right.replaceWith('')
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun        # The <b> tag is still connected to the tree.
817*4882a593Smuzhiyun        self.assertEqual("three", soup.b.string)
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun    def test_replace_final_node(self):
820*4882a593Smuzhiyun        soup = self.soup("<b>Argh!</b>")
821*4882a593Smuzhiyun        soup.find(text="Argh!").replace_with("Hooray!")
822*4882a593Smuzhiyun        new_text = soup.find(text="Hooray!")
823*4882a593Smuzhiyun        b = soup.b
824*4882a593Smuzhiyun        self.assertEqual(new_text.previous_element, b)
825*4882a593Smuzhiyun        self.assertEqual(new_text.parent, b)
826*4882a593Smuzhiyun        self.assertEqual(new_text.previous_element.next_element, new_text)
827*4882a593Smuzhiyun        self.assertEqual(new_text.next_element, None)
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun    def test_consecutive_text_nodes(self):
830*4882a593Smuzhiyun        # A builder should never create two consecutive text nodes,
831*4882a593Smuzhiyun        # but if you insert one next to another, Beautiful Soup will
832*4882a593Smuzhiyun        # handle it correctly.
833*4882a593Smuzhiyun        soup = self.soup("<a><b>Argh!</b><c></c></a>")
834*4882a593Smuzhiyun        soup.b.insert(1, "Hooray!")
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun        self.assertEqual(
837*4882a593Smuzhiyun            soup.decode(), self.document_for(
838*4882a593Smuzhiyun                "<a><b>Argh!Hooray!</b><c></c></a>"))
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun        new_text = soup.find(text="Hooray!")
841*4882a593Smuzhiyun        self.assertEqual(new_text.previous_element, "Argh!")
842*4882a593Smuzhiyun        self.assertEqual(new_text.previous_element.next_element, new_text)
843*4882a593Smuzhiyun
844*4882a593Smuzhiyun        self.assertEqual(new_text.previous_sibling, "Argh!")
845*4882a593Smuzhiyun        self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun        self.assertEqual(new_text.next_sibling, None)
848*4882a593Smuzhiyun        self.assertEqual(new_text.next_element, soup.c)
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun    def test_insert_string(self):
851*4882a593Smuzhiyun        soup = self.soup("<a></a>")
852*4882a593Smuzhiyun        soup.a.insert(0, "bar")
853*4882a593Smuzhiyun        soup.a.insert(0, "foo")
854*4882a593Smuzhiyun        # The string were added to the tag.
855*4882a593Smuzhiyun        self.assertEqual(["foo", "bar"], soup.a.contents)
856*4882a593Smuzhiyun        # And they were converted to NavigableStrings.
857*4882a593Smuzhiyun        self.assertEqual(soup.a.contents[0].next_element, "bar")
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun    def test_insert_tag(self):
860*4882a593Smuzhiyun        builder = self.default_builder
861*4882a593Smuzhiyun        soup = self.soup(
862*4882a593Smuzhiyun            "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
863*4882a593Smuzhiyun        magic_tag = Tag(soup, builder, 'magictag')
864*4882a593Smuzhiyun        magic_tag.insert(0, "the")
865*4882a593Smuzhiyun        soup.a.insert(1, magic_tag)
866*4882a593Smuzhiyun
867*4882a593Smuzhiyun        self.assertEqual(
868*4882a593Smuzhiyun            soup.decode(), self.document_for(
869*4882a593Smuzhiyun                "<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
870*4882a593Smuzhiyun
871*4882a593Smuzhiyun        # Make sure all the relationships are hooked up correctly.
872*4882a593Smuzhiyun        b_tag = soup.b
873*4882a593Smuzhiyun        self.assertEqual(b_tag.next_sibling, magic_tag)
874*4882a593Smuzhiyun        self.assertEqual(magic_tag.previous_sibling, b_tag)
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun        find = b_tag.find(text="Find")
877*4882a593Smuzhiyun        self.assertEqual(find.next_element, magic_tag)
878*4882a593Smuzhiyun        self.assertEqual(magic_tag.previous_element, find)
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun        c_tag = soup.c
881*4882a593Smuzhiyun        self.assertEqual(magic_tag.next_sibling, c_tag)
882*4882a593Smuzhiyun        self.assertEqual(c_tag.previous_sibling, magic_tag)
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun        the = magic_tag.find(text="the")
885*4882a593Smuzhiyun        self.assertEqual(the.parent, magic_tag)
886*4882a593Smuzhiyun        self.assertEqual(the.next_element, c_tag)
887*4882a593Smuzhiyun        self.assertEqual(c_tag.previous_element, the)
888*4882a593Smuzhiyun
889*4882a593Smuzhiyun    def test_append_child_thats_already_at_the_end(self):
890*4882a593Smuzhiyun        data = "<a><b></b></a>"
891*4882a593Smuzhiyun        soup = self.soup(data)
892*4882a593Smuzhiyun        soup.a.append(soup.b)
893*4882a593Smuzhiyun        self.assertEqual(data, soup.decode())
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun    def test_move_tag_to_beginning_of_parent(self):
896*4882a593Smuzhiyun        data = "<a><b></b><c></c><d></d></a>"
897*4882a593Smuzhiyun        soup = self.soup(data)
898*4882a593Smuzhiyun        soup.a.insert(0, soup.d)
899*4882a593Smuzhiyun        self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun    def test_insert_works_on_empty_element_tag(self):
902*4882a593Smuzhiyun        # This is a little strange, since most HTML parsers don't allow
903*4882a593Smuzhiyun        # markup like this to come through. But in general, we don't
904*4882a593Smuzhiyun        # know what the parser would or wouldn't have allowed, so
905*4882a593Smuzhiyun        # I'm letting this succeed for now.
906*4882a593Smuzhiyun        soup = self.soup("<br/>")
907*4882a593Smuzhiyun        soup.br.insert(1, "Contents")
908*4882a593Smuzhiyun        self.assertEqual(str(soup.br), "<br>Contents</br>")
909*4882a593Smuzhiyun
910*4882a593Smuzhiyun    def test_insert_before(self):
911*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><b>bar</b>")
912*4882a593Smuzhiyun        soup.b.insert_before("BAZ")
913*4882a593Smuzhiyun        soup.a.insert_before("QUUX")
914*4882a593Smuzhiyun        self.assertEqual(
915*4882a593Smuzhiyun            soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
916*4882a593Smuzhiyun
917*4882a593Smuzhiyun        soup.a.insert_before(soup.b)
918*4882a593Smuzhiyun        self.assertEqual(
919*4882a593Smuzhiyun            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun    def test_insert_after(self):
922*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><b>bar</b>")
923*4882a593Smuzhiyun        soup.b.insert_after("BAZ")
924*4882a593Smuzhiyun        soup.a.insert_after("QUUX")
925*4882a593Smuzhiyun        self.assertEqual(
926*4882a593Smuzhiyun            soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
927*4882a593Smuzhiyun        soup.b.insert_after(soup.a)
928*4882a593Smuzhiyun        self.assertEqual(
929*4882a593Smuzhiyun            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
930*4882a593Smuzhiyun
931*4882a593Smuzhiyun    def test_insert_after_raises_exception_if_after_has_no_meaning(self):
932*4882a593Smuzhiyun        soup = self.soup("")
933*4882a593Smuzhiyun        tag = soup.new_tag("a")
934*4882a593Smuzhiyun        string = soup.new_string("")
935*4882a593Smuzhiyun        self.assertRaises(ValueError, string.insert_after, tag)
936*4882a593Smuzhiyun        self.assertRaises(NotImplementedError, soup.insert_after, tag)
937*4882a593Smuzhiyun        self.assertRaises(ValueError, tag.insert_after, tag)
938*4882a593Smuzhiyun
939*4882a593Smuzhiyun    def test_insert_before_raises_notimplementederror_if_before_has_no_meaning(self):
940*4882a593Smuzhiyun        soup = self.soup("")
941*4882a593Smuzhiyun        tag = soup.new_tag("a")
942*4882a593Smuzhiyun        string = soup.new_string("")
943*4882a593Smuzhiyun        self.assertRaises(ValueError, string.insert_before, tag)
944*4882a593Smuzhiyun        self.assertRaises(NotImplementedError, soup.insert_before, tag)
945*4882a593Smuzhiyun        self.assertRaises(ValueError, tag.insert_before, tag)
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun    def test_replace_with(self):
948*4882a593Smuzhiyun        soup = self.soup(
949*4882a593Smuzhiyun                "<p>There's <b>no</b> business like <b>show</b> business</p>")
950*4882a593Smuzhiyun        no, show = soup.find_all('b')
951*4882a593Smuzhiyun        show.replace_with(no)
952*4882a593Smuzhiyun        self.assertEqual(
953*4882a593Smuzhiyun            soup.decode(),
954*4882a593Smuzhiyun            self.document_for(
955*4882a593Smuzhiyun                "<p>There's  business like <b>no</b> business</p>"))
956*4882a593Smuzhiyun
957*4882a593Smuzhiyun        self.assertEqual(show.parent, None)
958*4882a593Smuzhiyun        self.assertEqual(no.parent, soup.p)
959*4882a593Smuzhiyun        self.assertEqual(no.next_element, "no")
960*4882a593Smuzhiyun        self.assertEqual(no.next_sibling, " business")
961*4882a593Smuzhiyun
962*4882a593Smuzhiyun    def test_replace_first_child(self):
963*4882a593Smuzhiyun        data = "<a><b></b><c></c></a>"
964*4882a593Smuzhiyun        soup = self.soup(data)
965*4882a593Smuzhiyun        soup.b.replace_with(soup.c)
966*4882a593Smuzhiyun        self.assertEqual("<a><c></c></a>", soup.decode())
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun    def test_replace_last_child(self):
969*4882a593Smuzhiyun        data = "<a><b></b><c></c></a>"
970*4882a593Smuzhiyun        soup = self.soup(data)
971*4882a593Smuzhiyun        soup.c.replace_with(soup.b)
972*4882a593Smuzhiyun        self.assertEqual("<a><b></b></a>", soup.decode())
973*4882a593Smuzhiyun
974*4882a593Smuzhiyun    def test_nested_tag_replace_with(self):
975*4882a593Smuzhiyun        soup = self.soup(
976*4882a593Smuzhiyun            """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun        # Replace the entire <b> tag and its contents ("reserve the
979*4882a593Smuzhiyun        # right") with the <f> tag ("refuse").
980*4882a593Smuzhiyun        remove_tag = soup.b
981*4882a593Smuzhiyun        move_tag = soup.f
982*4882a593Smuzhiyun        remove_tag.replace_with(move_tag)
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun        self.assertEqual(
985*4882a593Smuzhiyun            soup.decode(), self.document_for(
986*4882a593Smuzhiyun                "<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun        # The <b> tag is now an orphan.
989*4882a593Smuzhiyun        self.assertEqual(remove_tag.parent, None)
990*4882a593Smuzhiyun        self.assertEqual(remove_tag.find(text="right").next_element, None)
991*4882a593Smuzhiyun        self.assertEqual(remove_tag.previous_element, None)
992*4882a593Smuzhiyun        self.assertEqual(remove_tag.next_sibling, None)
993*4882a593Smuzhiyun        self.assertEqual(remove_tag.previous_sibling, None)
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun        # The <f> tag is now connected to the <a> tag.
996*4882a593Smuzhiyun        self.assertEqual(move_tag.parent, soup.a)
997*4882a593Smuzhiyun        self.assertEqual(move_tag.previous_element, "We")
998*4882a593Smuzhiyun        self.assertEqual(move_tag.next_element.next_element, soup.e)
999*4882a593Smuzhiyun        self.assertEqual(move_tag.next_sibling, None)
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun        # The gap where the <f> tag used to be has been mended, and
1002*4882a593Smuzhiyun        # the word "to" is now connected to the <g> tag.
1003*4882a593Smuzhiyun        to_text = soup.find(text="to")
1004*4882a593Smuzhiyun        g_tag = soup.g
1005*4882a593Smuzhiyun        self.assertEqual(to_text.next_element, g_tag)
1006*4882a593Smuzhiyun        self.assertEqual(to_text.next_sibling, g_tag)
1007*4882a593Smuzhiyun        self.assertEqual(g_tag.previous_element, to_text)
1008*4882a593Smuzhiyun        self.assertEqual(g_tag.previous_sibling, to_text)
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun    def test_unwrap(self):
1011*4882a593Smuzhiyun        tree = self.soup("""
1012*4882a593Smuzhiyun            <p>Unneeded <em>formatting</em> is unneeded</p>
1013*4882a593Smuzhiyun            """)
1014*4882a593Smuzhiyun        tree.em.unwrap()
1015*4882a593Smuzhiyun        self.assertEqual(tree.em, None)
1016*4882a593Smuzhiyun        self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun    def test_wrap(self):
1019*4882a593Smuzhiyun        soup = self.soup("I wish I was bold.")
1020*4882a593Smuzhiyun        value = soup.string.wrap(soup.new_tag("b"))
1021*4882a593Smuzhiyun        self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
1022*4882a593Smuzhiyun        self.assertEqual(
1023*4882a593Smuzhiyun            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
1024*4882a593Smuzhiyun
1025*4882a593Smuzhiyun    def test_wrap_extracts_tag_from_elsewhere(self):
1026*4882a593Smuzhiyun        soup = self.soup("<b></b>I wish I was bold.")
1027*4882a593Smuzhiyun        soup.b.next_sibling.wrap(soup.b)
1028*4882a593Smuzhiyun        self.assertEqual(
1029*4882a593Smuzhiyun            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
1030*4882a593Smuzhiyun
1031*4882a593Smuzhiyun    def test_wrap_puts_new_contents_at_the_end(self):
1032*4882a593Smuzhiyun        soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
1033*4882a593Smuzhiyun        soup.b.next_sibling.wrap(soup.b)
1034*4882a593Smuzhiyun        self.assertEqual(2, len(soup.b.contents))
1035*4882a593Smuzhiyun        self.assertEqual(
1036*4882a593Smuzhiyun            soup.decode(), self.document_for(
1037*4882a593Smuzhiyun                "<b>I like being bold.I wish I was bold.</b>"))
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun    def test_extract(self):
1040*4882a593Smuzhiyun        soup = self.soup(
1041*4882a593Smuzhiyun            '<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
1042*4882a593Smuzhiyun
1043*4882a593Smuzhiyun        self.assertEqual(len(soup.body.contents), 3)
1044*4882a593Smuzhiyun        extracted = soup.find(id="nav").extract()
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun        self.assertEqual(
1047*4882a593Smuzhiyun            soup.decode(), "<html><body>Some content.  More content.</body></html>")
1048*4882a593Smuzhiyun        self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun        # The extracted tag is now an orphan.
1051*4882a593Smuzhiyun        self.assertEqual(len(soup.body.contents), 2)
1052*4882a593Smuzhiyun        self.assertEqual(extracted.parent, None)
1053*4882a593Smuzhiyun        self.assertEqual(extracted.previous_element, None)
1054*4882a593Smuzhiyun        self.assertEqual(extracted.next_element.next_element, None)
1055*4882a593Smuzhiyun
1056*4882a593Smuzhiyun        # The gap where the extracted tag used to be has been mended.
1057*4882a593Smuzhiyun        content_1 = soup.find(text="Some content. ")
1058*4882a593Smuzhiyun        content_2 = soup.find(text=" More content.")
1059*4882a593Smuzhiyun        self.assertEqual(content_1.next_element, content_2)
1060*4882a593Smuzhiyun        self.assertEqual(content_1.next_sibling, content_2)
1061*4882a593Smuzhiyun        self.assertEqual(content_2.previous_element, content_1)
1062*4882a593Smuzhiyun        self.assertEqual(content_2.previous_sibling, content_1)
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun    def test_extract_distinguishes_between_identical_strings(self):
1065*4882a593Smuzhiyun        soup = self.soup("<a>foo</a><b>bar</b>")
1066*4882a593Smuzhiyun        foo_1 = soup.a.string
1067*4882a593Smuzhiyun        bar_1 = soup.b.string
1068*4882a593Smuzhiyun        foo_2 = soup.new_string("foo")
1069*4882a593Smuzhiyun        bar_2 = soup.new_string("bar")
1070*4882a593Smuzhiyun        soup.a.append(foo_2)
1071*4882a593Smuzhiyun        soup.b.append(bar_2)
1072*4882a593Smuzhiyun
1073*4882a593Smuzhiyun        # Now there are two identical strings in the <a> tag, and two
1074*4882a593Smuzhiyun        # in the <b> tag. Let's remove the first "foo" and the second
1075*4882a593Smuzhiyun        # "bar".
1076*4882a593Smuzhiyun        foo_1.extract()
1077*4882a593Smuzhiyun        bar_2.extract()
1078*4882a593Smuzhiyun        self.assertEqual(foo_2, soup.a.string)
1079*4882a593Smuzhiyun        self.assertEqual(bar_2, soup.b.string)
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun    def test_extract_multiples_of_same_tag(self):
1082*4882a593Smuzhiyun        soup = self.soup("""
1083*4882a593Smuzhiyun<html>
1084*4882a593Smuzhiyun<head>
1085*4882a593Smuzhiyun<script>foo</script>
1086*4882a593Smuzhiyun</head>
1087*4882a593Smuzhiyun<body>
1088*4882a593Smuzhiyun <script>bar</script>
1089*4882a593Smuzhiyun <a></a>
1090*4882a593Smuzhiyun</body>
1091*4882a593Smuzhiyun<script>baz</script>
1092*4882a593Smuzhiyun</html>""")
1093*4882a593Smuzhiyun        [soup.script.extract() for i in soup.find_all("script")]
1094*4882a593Smuzhiyun        self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body))
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun
1097*4882a593Smuzhiyun    def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
1098*4882a593Smuzhiyun        soup = self.soup(
1099*4882a593Smuzhiyun '<html>\n'
1100*4882a593Smuzhiyun '<body>hi</body>\n'
1101*4882a593Smuzhiyun '</html>')
1102*4882a593Smuzhiyun        soup.find('body').extract()
1103*4882a593Smuzhiyun        self.assertEqual(None, soup.find('body'))
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun
1106*4882a593Smuzhiyun    def test_clear(self):
1107*4882a593Smuzhiyun        """Tag.clear()"""
1108*4882a593Smuzhiyun        soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
1109*4882a593Smuzhiyun        # clear using extract()
1110*4882a593Smuzhiyun        a = soup.a
1111*4882a593Smuzhiyun        soup.p.clear()
1112*4882a593Smuzhiyun        self.assertEqual(len(soup.p.contents), 0)
1113*4882a593Smuzhiyun        self.assertTrue(hasattr(a, "contents"))
1114*4882a593Smuzhiyun
1115*4882a593Smuzhiyun        # clear using decompose()
1116*4882a593Smuzhiyun        em = a.em
1117*4882a593Smuzhiyun        a.clear(decompose=True)
1118*4882a593Smuzhiyun        self.assertEqual(0, len(em.contents))
1119*4882a593Smuzhiyun
1120*4882a593Smuzhiyun    def test_string_set(self):
1121*4882a593Smuzhiyun        """Tag.string = 'string'"""
1122*4882a593Smuzhiyun        soup = self.soup("<a></a> <b><c></c></b>")
1123*4882a593Smuzhiyun        soup.a.string = "foo"
1124*4882a593Smuzhiyun        self.assertEqual(soup.a.contents, ["foo"])
1125*4882a593Smuzhiyun        soup.b.string = "bar"
1126*4882a593Smuzhiyun        self.assertEqual(soup.b.contents, ["bar"])
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun    def test_string_set_does_not_affect_original_string(self):
1129*4882a593Smuzhiyun        soup = self.soup("<a><b>foo</b><c>bar</c>")
1130*4882a593Smuzhiyun        soup.b.string = soup.c.string
1131*4882a593Smuzhiyun        self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
1132*4882a593Smuzhiyun
1133*4882a593Smuzhiyun    def test_set_string_preserves_class_of_string(self):
1134*4882a593Smuzhiyun        soup = self.soup("<a></a>")
1135*4882a593Smuzhiyun        cdata = CData("foo")
1136*4882a593Smuzhiyun        soup.a.string = cdata
1137*4882a593Smuzhiyun        self.assertTrue(isinstance(soup.a.string, CData))
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyunclass TestElementObjects(SoupTest):
1140*4882a593Smuzhiyun    """Test various features of element objects."""
1141*4882a593Smuzhiyun
1142*4882a593Smuzhiyun    def test_len(self):
1143*4882a593Smuzhiyun        """The length of an element is its number of children."""
1144*4882a593Smuzhiyun        soup = self.soup("<top>1<b>2</b>3</top>")
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun        # The BeautifulSoup object itself contains one element: the
1147*4882a593Smuzhiyun        # <top> tag.
1148*4882a593Smuzhiyun        self.assertEqual(len(soup.contents), 1)
1149*4882a593Smuzhiyun        self.assertEqual(len(soup), 1)
1150*4882a593Smuzhiyun
1151*4882a593Smuzhiyun        # The <top> tag contains three elements: the text node "1", the
1152*4882a593Smuzhiyun        # <b> tag, and the text node "3".
1153*4882a593Smuzhiyun        self.assertEqual(len(soup.top), 3)
1154*4882a593Smuzhiyun        self.assertEqual(len(soup.top.contents), 3)
1155*4882a593Smuzhiyun
1156*4882a593Smuzhiyun    def test_member_access_invokes_find(self):
1157*4882a593Smuzhiyun        """Accessing a Python member .foo invokes find('foo')"""
1158*4882a593Smuzhiyun        soup = self.soup('<b><i></i></b>')
1159*4882a593Smuzhiyun        self.assertEqual(soup.b, soup.find('b'))
1160*4882a593Smuzhiyun        self.assertEqual(soup.b.i, soup.find('b').find('i'))
1161*4882a593Smuzhiyun        self.assertEqual(soup.a, None)
1162*4882a593Smuzhiyun
1163*4882a593Smuzhiyun    def test_deprecated_member_access(self):
1164*4882a593Smuzhiyun        soup = self.soup('<b><i></i></b>')
1165*4882a593Smuzhiyun        with warnings.catch_warnings(record=True) as w:
1166*4882a593Smuzhiyun            tag = soup.bTag
1167*4882a593Smuzhiyun        self.assertEqual(soup.b, tag)
1168*4882a593Smuzhiyun        self.assertEqual(
1169*4882a593Smuzhiyun            '.bTag is deprecated, use .find("b") instead.',
1170*4882a593Smuzhiyun            str(w[0].message))
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun    def test_has_attr(self):
1173*4882a593Smuzhiyun        """has_attr() checks for the presence of an attribute.
1174*4882a593Smuzhiyun
1175*4882a593Smuzhiyun        Please note note: has_attr() is different from
1176*4882a593Smuzhiyun        __in__. has_attr() checks the tag's attributes and __in__
1177*4882a593Smuzhiyun        checks the tag's chidlren.
1178*4882a593Smuzhiyun        """
1179*4882a593Smuzhiyun        soup = self.soup("<foo attr='bar'>")
1180*4882a593Smuzhiyun        self.assertTrue(soup.foo.has_attr('attr'))
1181*4882a593Smuzhiyun        self.assertFalse(soup.foo.has_attr('attr2'))
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun
1184*4882a593Smuzhiyun    def test_attributes_come_out_in_alphabetical_order(self):
1185*4882a593Smuzhiyun        markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
1186*4882a593Smuzhiyun        self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun    def test_string(self):
1189*4882a593Smuzhiyun        # A tag that contains only a text node makes that node
1190*4882a593Smuzhiyun        # available as .string.
1191*4882a593Smuzhiyun        soup = self.soup("<b>foo</b>")
1192*4882a593Smuzhiyun        self.assertEqual(soup.b.string, 'foo')
1193*4882a593Smuzhiyun
1194*4882a593Smuzhiyun    def test_empty_tag_has_no_string(self):
1195*4882a593Smuzhiyun        # A tag with no children has no .stirng.
1196*4882a593Smuzhiyun        soup = self.soup("<b></b>")
1197*4882a593Smuzhiyun        self.assertEqual(soup.b.string, None)
1198*4882a593Smuzhiyun
1199*4882a593Smuzhiyun    def test_tag_with_multiple_children_has_no_string(self):
1200*4882a593Smuzhiyun        # A tag with no children has no .string.
1201*4882a593Smuzhiyun        soup = self.soup("<a>foo<b></b><b></b></b>")
1202*4882a593Smuzhiyun        self.assertEqual(soup.b.string, None)
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun        soup = self.soup("<a>foo<b></b>bar</b>")
1205*4882a593Smuzhiyun        self.assertEqual(soup.b.string, None)
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun        # Even if all the children are strings, due to trickery,
1208*4882a593Smuzhiyun        # it won't work--but this would be a good optimization.
1209*4882a593Smuzhiyun        soup = self.soup("<a>foo</b>")
1210*4882a593Smuzhiyun        soup.a.insert(1, "bar")
1211*4882a593Smuzhiyun        self.assertEqual(soup.a.string, None)
1212*4882a593Smuzhiyun
1213*4882a593Smuzhiyun    def test_tag_with_recursive_string_has_string(self):
1214*4882a593Smuzhiyun        # A tag with a single child which has a .string inherits that
1215*4882a593Smuzhiyun        # .string.
1216*4882a593Smuzhiyun        soup = self.soup("<a><b>foo</b></a>")
1217*4882a593Smuzhiyun        self.assertEqual(soup.a.string, "foo")
1218*4882a593Smuzhiyun        self.assertEqual(soup.string, "foo")
1219*4882a593Smuzhiyun
1220*4882a593Smuzhiyun    def test_lack_of_string(self):
1221*4882a593Smuzhiyun        """Only a tag containing a single text node has a .string."""
1222*4882a593Smuzhiyun        soup = self.soup("<b>f<i>e</i>o</b>")
1223*4882a593Smuzhiyun        self.assertFalse(soup.b.string)
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun        soup = self.soup("<b></b>")
1226*4882a593Smuzhiyun        self.assertFalse(soup.b.string)
1227*4882a593Smuzhiyun
1228*4882a593Smuzhiyun    def test_all_text(self):
1229*4882a593Smuzhiyun        """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
1230*4882a593Smuzhiyun        soup = self.soup("<a>a<b>r</b>   <r> t </r></a>")
1231*4882a593Smuzhiyun        self.assertEqual(soup.a.text, "ar  t ")
1232*4882a593Smuzhiyun        self.assertEqual(soup.a.get_text(strip=True), "art")
1233*4882a593Smuzhiyun        self.assertEqual(soup.a.get_text(","), "a,r, , t ")
1234*4882a593Smuzhiyun        self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
1235*4882a593Smuzhiyun
1236*4882a593Smuzhiyun    def test_get_text_ignores_comments(self):
1237*4882a593Smuzhiyun        soup = self.soup("foo<!--IGNORE-->bar")
1238*4882a593Smuzhiyun        self.assertEqual(soup.get_text(), "foobar")
1239*4882a593Smuzhiyun
1240*4882a593Smuzhiyun        self.assertEqual(
1241*4882a593Smuzhiyun            soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
1242*4882a593Smuzhiyun        self.assertEqual(
1243*4882a593Smuzhiyun            soup.get_text(types=None), "fooIGNOREbar")
1244*4882a593Smuzhiyun
1245*4882a593Smuzhiyun    def test_all_strings_ignores_comments(self):
1246*4882a593Smuzhiyun        soup = self.soup("foo<!--IGNORE-->bar")
1247*4882a593Smuzhiyun        self.assertEqual(['foo', 'bar'], list(soup.strings))
1248*4882a593Smuzhiyun
1249*4882a593Smuzhiyunclass TestCDAtaListAttributes(SoupTest):
1250*4882a593Smuzhiyun
1251*4882a593Smuzhiyun    """Testing cdata-list attributes like 'class'.
1252*4882a593Smuzhiyun    """
1253*4882a593Smuzhiyun    def test_single_value_becomes_list(self):
1254*4882a593Smuzhiyun        soup = self.soup("<a class='foo'>")
1255*4882a593Smuzhiyun        self.assertEqual(["foo"],soup.a['class'])
1256*4882a593Smuzhiyun
1257*4882a593Smuzhiyun    def test_multiple_values_becomes_list(self):
1258*4882a593Smuzhiyun        soup = self.soup("<a class='foo bar'>")
1259*4882a593Smuzhiyun        self.assertEqual(["foo", "bar"], soup.a['class'])
1260*4882a593Smuzhiyun
1261*4882a593Smuzhiyun    def test_multiple_values_separated_by_weird_whitespace(self):
1262*4882a593Smuzhiyun        soup = self.soup("<a class='foo\tbar\nbaz'>")
1263*4882a593Smuzhiyun        self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
1264*4882a593Smuzhiyun
1265*4882a593Smuzhiyun    def test_attributes_joined_into_string_on_output(self):
1266*4882a593Smuzhiyun        soup = self.soup("<a class='foo\tbar'>")
1267*4882a593Smuzhiyun        self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun    def test_accept_charset(self):
1270*4882a593Smuzhiyun        soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
1271*4882a593Smuzhiyun        self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun    def test_cdata_attribute_applying_only_to_one_tag(self):
1274*4882a593Smuzhiyun        data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
1275*4882a593Smuzhiyun        soup = self.soup(data)
1276*4882a593Smuzhiyun        # We saw in another test that accept-charset is a cdata-list
1277*4882a593Smuzhiyun        # attribute for the <form> tag. But it's not a cdata-list
1278*4882a593Smuzhiyun        # attribute for any other tag.
1279*4882a593Smuzhiyun        self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
1280*4882a593Smuzhiyun
1281*4882a593Smuzhiyun    def test_string_has_immutable_name_property(self):
1282*4882a593Smuzhiyun        string = self.soup("s").string
1283*4882a593Smuzhiyun        self.assertEqual(None, string.name)
1284*4882a593Smuzhiyun        def t():
1285*4882a593Smuzhiyun            string.name = 'foo'
1286*4882a593Smuzhiyun        self.assertRaises(AttributeError, t)
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyunclass TestPersistence(SoupTest):
1289*4882a593Smuzhiyun    "Testing features like pickle and deepcopy."
1290*4882a593Smuzhiyun
1291*4882a593Smuzhiyun    def setUp(self):
1292*4882a593Smuzhiyun        super(TestPersistence, self).setUp()
1293*4882a593Smuzhiyun        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
1294*4882a593Smuzhiyun"http://www.w3.org/TR/REC-html40/transitional.dtd">
1295*4882a593Smuzhiyun<html>
1296*4882a593Smuzhiyun<head>
1297*4882a593Smuzhiyun<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
1298*4882a593Smuzhiyun<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
1299*4882a593Smuzhiyun<link rev="made" href="mailto:leonardr@segfault.org">
1300*4882a593Smuzhiyun<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
1301*4882a593Smuzhiyun<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
1302*4882a593Smuzhiyun<meta name="author" content="Leonard Richardson">
1303*4882a593Smuzhiyun</head>
1304*4882a593Smuzhiyun<body>
1305*4882a593Smuzhiyun<a href="foo">foo</a>
1306*4882a593Smuzhiyun<a href="foo"><b>bar</b></a>
1307*4882a593Smuzhiyun</body>
1308*4882a593Smuzhiyun</html>"""
1309*4882a593Smuzhiyun        self.tree = self.soup(self.page)
1310*4882a593Smuzhiyun
1311*4882a593Smuzhiyun    def test_pickle_and_unpickle_identity(self):
1312*4882a593Smuzhiyun        # Pickling a tree, then unpickling it, yields a tree identical
1313*4882a593Smuzhiyun        # to the original.
1314*4882a593Smuzhiyun        dumped = pickle.dumps(self.tree, 2)
1315*4882a593Smuzhiyun        loaded = pickle.loads(dumped)
1316*4882a593Smuzhiyun        self.assertEqual(loaded.__class__, BeautifulSoup)
1317*4882a593Smuzhiyun        self.assertEqual(loaded.decode(), self.tree.decode())
1318*4882a593Smuzhiyun
1319*4882a593Smuzhiyun    def test_deepcopy_identity(self):
1320*4882a593Smuzhiyun        # Making a deepcopy of a tree yields an identical tree.
1321*4882a593Smuzhiyun        copied = copy.deepcopy(self.tree)
1322*4882a593Smuzhiyun        self.assertEqual(copied.decode(), self.tree.decode())
1323*4882a593Smuzhiyun
1324*4882a593Smuzhiyun    def test_unicode_pickle(self):
1325*4882a593Smuzhiyun        # A tree containing Unicode characters can be pickled.
1326*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1327*4882a593Smuzhiyun        soup = self.soup(html)
1328*4882a593Smuzhiyun        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
1329*4882a593Smuzhiyun        loaded = pickle.loads(dumped)
1330*4882a593Smuzhiyun        self.assertEqual(loaded.decode(), soup.decode())
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyun    def test_copy_navigablestring_is_not_attached_to_tree(self):
1333*4882a593Smuzhiyun        html = "<b>Foo<a></a></b><b>Bar</b>"
1334*4882a593Smuzhiyun        soup = self.soup(html)
1335*4882a593Smuzhiyun        s1 = soup.find(string="Foo")
1336*4882a593Smuzhiyun        s2 = copy.copy(s1)
1337*4882a593Smuzhiyun        self.assertEqual(s1, s2)
1338*4882a593Smuzhiyun        self.assertEqual(None, s2.parent)
1339*4882a593Smuzhiyun        self.assertEqual(None, s2.next_element)
1340*4882a593Smuzhiyun        self.assertNotEqual(None, s1.next_sibling)
1341*4882a593Smuzhiyun        self.assertEqual(None, s2.next_sibling)
1342*4882a593Smuzhiyun        self.assertEqual(None, s2.previous_element)
1343*4882a593Smuzhiyun
1344*4882a593Smuzhiyun    def test_copy_navigablestring_subclass_has_same_type(self):
1345*4882a593Smuzhiyun        html = "<b><!--Foo--></b>"
1346*4882a593Smuzhiyun        soup = self.soup(html)
1347*4882a593Smuzhiyun        s1 = soup.string
1348*4882a593Smuzhiyun        s2 = copy.copy(s1)
1349*4882a593Smuzhiyun        self.assertEqual(s1, s2)
1350*4882a593Smuzhiyun        self.assertTrue(isinstance(s2, Comment))
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun    def test_copy_entire_soup(self):
1353*4882a593Smuzhiyun        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
1354*4882a593Smuzhiyun        soup = self.soup(html)
1355*4882a593Smuzhiyun        soup_copy = copy.copy(soup)
1356*4882a593Smuzhiyun        self.assertEqual(soup, soup_copy)
1357*4882a593Smuzhiyun
1358*4882a593Smuzhiyun    def test_copy_tag_copies_contents(self):
1359*4882a593Smuzhiyun        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
1360*4882a593Smuzhiyun        soup = self.soup(html)
1361*4882a593Smuzhiyun        div = soup.div
1362*4882a593Smuzhiyun        div_copy = copy.copy(div)
1363*4882a593Smuzhiyun
1364*4882a593Smuzhiyun        # The two tags look the same, and evaluate to equal.
1365*4882a593Smuzhiyun        self.assertEqual(str(div), str(div_copy))
1366*4882a593Smuzhiyun        self.assertEqual(div, div_copy)
1367*4882a593Smuzhiyun
1368*4882a593Smuzhiyun        # But they're not the same object.
1369*4882a593Smuzhiyun        self.assertFalse(div is div_copy)
1370*4882a593Smuzhiyun
1371*4882a593Smuzhiyun        # And they don't have the same relation to the parse tree. The
1372*4882a593Smuzhiyun        # copy is not associated with a parse tree at all.
1373*4882a593Smuzhiyun        self.assertEqual(None, div_copy.parent)
1374*4882a593Smuzhiyun        self.assertEqual(None, div_copy.previous_element)
1375*4882a593Smuzhiyun        self.assertEqual(None, div_copy.find(string='Bar').next_element)
1376*4882a593Smuzhiyun        self.assertNotEqual(None, div.find(string='Bar').next_element)
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyunclass TestSubstitutions(SoupTest):
1379*4882a593Smuzhiyun
1380*4882a593Smuzhiyun    def test_default_formatter_is_minimal(self):
1381*4882a593Smuzhiyun        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1382*4882a593Smuzhiyun        soup = self.soup(markup)
1383*4882a593Smuzhiyun        decoded = soup.decode(formatter="minimal")
1384*4882a593Smuzhiyun        # The < is converted back into &lt; but the e-with-acute is left alone.
1385*4882a593Smuzhiyun        self.assertEqual(
1386*4882a593Smuzhiyun            decoded,
1387*4882a593Smuzhiyun            self.document_for(
1388*4882a593Smuzhiyun                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
1389*4882a593Smuzhiyun
1390*4882a593Smuzhiyun    def test_formatter_html(self):
1391*4882a593Smuzhiyun        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1392*4882a593Smuzhiyun        soup = self.soup(markup)
1393*4882a593Smuzhiyun        decoded = soup.decode(formatter="html")
1394*4882a593Smuzhiyun        self.assertEqual(
1395*4882a593Smuzhiyun            decoded,
1396*4882a593Smuzhiyun            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
1397*4882a593Smuzhiyun
1398*4882a593Smuzhiyun    def test_formatter_minimal(self):
1399*4882a593Smuzhiyun        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1400*4882a593Smuzhiyun        soup = self.soup(markup)
1401*4882a593Smuzhiyun        decoded = soup.decode(formatter="minimal")
1402*4882a593Smuzhiyun        # The < is converted back into &lt; but the e-with-acute is left alone.
1403*4882a593Smuzhiyun        self.assertEqual(
1404*4882a593Smuzhiyun            decoded,
1405*4882a593Smuzhiyun            self.document_for(
1406*4882a593Smuzhiyun                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
1407*4882a593Smuzhiyun
1408*4882a593Smuzhiyun    def test_formatter_null(self):
1409*4882a593Smuzhiyun        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1410*4882a593Smuzhiyun        soup = self.soup(markup)
1411*4882a593Smuzhiyun        decoded = soup.decode(formatter=None)
1412*4882a593Smuzhiyun        # Neither the angle brackets nor the e-with-acute are converted.
1413*4882a593Smuzhiyun        # This is not valid HTML, but it's what the user wanted.
1414*4882a593Smuzhiyun        self.assertEqual(decoded,
1415*4882a593Smuzhiyun                          self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
1416*4882a593Smuzhiyun
1417*4882a593Smuzhiyun    def test_formatter_custom(self):
1418*4882a593Smuzhiyun        markup = "<b>&lt;foo&gt;</b><b>bar</b>"
1419*4882a593Smuzhiyun        soup = self.soup(markup)
1420*4882a593Smuzhiyun        decoded = soup.decode(formatter = lambda x: x.upper())
1421*4882a593Smuzhiyun        # Instead of normal entity conversion code, the custom
1422*4882a593Smuzhiyun        # callable is called on every string.
1423*4882a593Smuzhiyun        self.assertEqual(
1424*4882a593Smuzhiyun            decoded,
1425*4882a593Smuzhiyun            self.document_for("<b><FOO></b><b>BAR</b>"))
1426*4882a593Smuzhiyun
1427*4882a593Smuzhiyun    def test_formatter_is_run_on_attribute_values(self):
1428*4882a593Smuzhiyun        markup = '<a href="http://a.com?a=b&c=é">e</a>'
1429*4882a593Smuzhiyun        soup = self.soup(markup)
1430*4882a593Smuzhiyun        a = soup.a
1431*4882a593Smuzhiyun
1432*4882a593Smuzhiyun        expect_minimal = '<a href="http://a.com?a=b&amp;c=é">e</a>'
1433*4882a593Smuzhiyun
1434*4882a593Smuzhiyun        self.assertEqual(expect_minimal, a.decode())
1435*4882a593Smuzhiyun        self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
1436*4882a593Smuzhiyun
1437*4882a593Smuzhiyun        expect_html = '<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
1438*4882a593Smuzhiyun        self.assertEqual(expect_html, a.decode(formatter="html"))
1439*4882a593Smuzhiyun
1440*4882a593Smuzhiyun        self.assertEqual(markup, a.decode(formatter=None))
1441*4882a593Smuzhiyun        expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>'
1442*4882a593Smuzhiyun        self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
1443*4882a593Smuzhiyun
1444*4882a593Smuzhiyun    def test_formatter_skips_script_tag_for_html_documents(self):
1445*4882a593Smuzhiyun        doc = """
1446*4882a593Smuzhiyun  <script type="text/javascript">
1447*4882a593Smuzhiyun   console.log("< < hey > > ");
1448*4882a593Smuzhiyun  </script>
1449*4882a593Smuzhiyun"""
1450*4882a593Smuzhiyun        encoded = BeautifulSoup(doc, 'html.parser').encode()
1451*4882a593Smuzhiyun        self.assertTrue(b"< < hey > >" in encoded)
1452*4882a593Smuzhiyun
1453*4882a593Smuzhiyun    def test_formatter_skips_style_tag_for_html_documents(self):
1454*4882a593Smuzhiyun        doc = """
1455*4882a593Smuzhiyun  <style type="text/css">
1456*4882a593Smuzhiyun   console.log("< < hey > > ");
1457*4882a593Smuzhiyun  </style>
1458*4882a593Smuzhiyun"""
1459*4882a593Smuzhiyun        encoded = BeautifulSoup(doc, 'html.parser').encode()
1460*4882a593Smuzhiyun        self.assertTrue(b"< < hey > >" in encoded)
1461*4882a593Smuzhiyun
1462*4882a593Smuzhiyun    def test_prettify_leaves_preformatted_text_alone(self):
1463*4882a593Smuzhiyun        soup = self.soup("<div>  foo  <pre>  \tbar\n  \n  </pre>  baz  ")
1464*4882a593Smuzhiyun        # Everything outside the <pre> tag is reformatted, but everything
1465*4882a593Smuzhiyun        # inside is left alone.
1466*4882a593Smuzhiyun        self.assertEqual(
1467*4882a593Smuzhiyun            '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n</div>',
1468*4882a593Smuzhiyun            soup.div.prettify())
1469*4882a593Smuzhiyun
1470*4882a593Smuzhiyun    def test_prettify_accepts_formatter(self):
1471*4882a593Smuzhiyun        soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
1472*4882a593Smuzhiyun        pretty = soup.prettify(formatter = lambda x: x.upper())
1473*4882a593Smuzhiyun        self.assertTrue("FOO" in pretty)
1474*4882a593Smuzhiyun
1475*4882a593Smuzhiyun    def test_prettify_outputs_unicode_by_default(self):
1476*4882a593Smuzhiyun        soup = self.soup("<a></a>")
1477*4882a593Smuzhiyun        self.assertEqual(str, type(soup.prettify()))
1478*4882a593Smuzhiyun
1479*4882a593Smuzhiyun    def test_prettify_can_encode_data(self):
1480*4882a593Smuzhiyun        soup = self.soup("<a></a>")
1481*4882a593Smuzhiyun        self.assertEqual(bytes, type(soup.prettify("utf-8")))
1482*4882a593Smuzhiyun
1483*4882a593Smuzhiyun    def test_html_entity_substitution_off_by_default(self):
1484*4882a593Smuzhiyun        markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
1485*4882a593Smuzhiyun        soup = self.soup(markup)
1486*4882a593Smuzhiyun        encoded = soup.b.encode("utf-8")
1487*4882a593Smuzhiyun        self.assertEqual(encoded, markup.encode('utf-8'))
1488*4882a593Smuzhiyun
1489*4882a593Smuzhiyun    def test_encoding_substitution(self):
1490*4882a593Smuzhiyun        # Here's the <meta> tag saying that a document is
1491*4882a593Smuzhiyun        # encoded in Shift-JIS.
1492*4882a593Smuzhiyun        meta_tag = ('<meta content="text/html; charset=x-sjis" '
1493*4882a593Smuzhiyun                    'http-equiv="Content-type"/>')
1494*4882a593Smuzhiyun        soup = self.soup(meta_tag)
1495*4882a593Smuzhiyun
1496*4882a593Smuzhiyun        # Parse the document, and the charset apprears unchanged.
1497*4882a593Smuzhiyun        self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
1498*4882a593Smuzhiyun
1499*4882a593Smuzhiyun        # Encode the document into some encoding, and the encoding is
1500*4882a593Smuzhiyun        # substituted into the meta tag.
1501*4882a593Smuzhiyun        utf_8 = soup.encode("utf-8")
1502*4882a593Smuzhiyun        self.assertTrue(b"charset=utf-8" in utf_8)
1503*4882a593Smuzhiyun
1504*4882a593Smuzhiyun        euc_jp = soup.encode("euc_jp")
1505*4882a593Smuzhiyun        self.assertTrue(b"charset=euc_jp" in euc_jp)
1506*4882a593Smuzhiyun
1507*4882a593Smuzhiyun        shift_jis = soup.encode("shift-jis")
1508*4882a593Smuzhiyun        self.assertTrue(b"charset=shift-jis" in shift_jis)
1509*4882a593Smuzhiyun
1510*4882a593Smuzhiyun        utf_16_u = soup.encode("utf-16").decode("utf-16")
1511*4882a593Smuzhiyun        self.assertTrue("charset=utf-16" in utf_16_u)
1512*4882a593Smuzhiyun
1513*4882a593Smuzhiyun    def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
1514*4882a593Smuzhiyun        markup = ('<head><meta content="text/html; charset=x-sjis" '
1515*4882a593Smuzhiyun                    'http-equiv="Content-type"/></head><pre>foo</pre>')
1516*4882a593Smuzhiyun
1517*4882a593Smuzhiyun        # Beautiful Soup used to try to rewrite the meta tag even if the
1518*4882a593Smuzhiyun        # meta tag got filtered out by the strainer. This test makes
1519*4882a593Smuzhiyun        # sure that doesn't happen.
1520*4882a593Smuzhiyun        strainer = SoupStrainer('pre')
1521*4882a593Smuzhiyun        soup = self.soup(markup, parse_only=strainer)
1522*4882a593Smuzhiyun        self.assertEqual(soup.contents[0].name, 'pre')
1523*4882a593Smuzhiyun
1524*4882a593Smuzhiyunclass TestEncoding(SoupTest):
1525*4882a593Smuzhiyun    """Test the ability to encode objects into strings."""
1526*4882a593Smuzhiyun
1527*4882a593Smuzhiyun    def test_unicode_string_can_be_encoded(self):
1528*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1529*4882a593Smuzhiyun        soup = self.soup(html)
1530*4882a593Smuzhiyun        self.assertEqual(soup.b.string.encode("utf-8"),
1531*4882a593Smuzhiyun                          "\N{SNOWMAN}".encode("utf-8"))
1532*4882a593Smuzhiyun
1533*4882a593Smuzhiyun    def test_tag_containing_unicode_string_can_be_encoded(self):
1534*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1535*4882a593Smuzhiyun        soup = self.soup(html)
1536*4882a593Smuzhiyun        self.assertEqual(
1537*4882a593Smuzhiyun            soup.b.encode("utf-8"), html.encode("utf-8"))
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun    def test_encoding_substitutes_unrecognized_characters_by_default(self):
1540*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1541*4882a593Smuzhiyun        soup = self.soup(html)
1542*4882a593Smuzhiyun        self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
1543*4882a593Smuzhiyun
1544*4882a593Smuzhiyun    def test_encoding_can_be_made_strict(self):
1545*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1546*4882a593Smuzhiyun        soup = self.soup(html)
1547*4882a593Smuzhiyun        self.assertRaises(
1548*4882a593Smuzhiyun            UnicodeEncodeError, soup.encode, "ascii", errors="strict")
1549*4882a593Smuzhiyun
1550*4882a593Smuzhiyun    def test_decode_contents(self):
1551*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1552*4882a593Smuzhiyun        soup = self.soup(html)
1553*4882a593Smuzhiyun        self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
1554*4882a593Smuzhiyun
1555*4882a593Smuzhiyun    def test_encode_contents(self):
1556*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1557*4882a593Smuzhiyun        soup = self.soup(html)
1558*4882a593Smuzhiyun        self.assertEqual(
1559*4882a593Smuzhiyun            "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
1560*4882a593Smuzhiyun                encoding="utf8"))
1561*4882a593Smuzhiyun
1562*4882a593Smuzhiyun    def test_deprecated_renderContents(self):
1563*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1564*4882a593Smuzhiyun        soup = self.soup(html)
1565*4882a593Smuzhiyun        self.assertEqual(
1566*4882a593Smuzhiyun            "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
1567*4882a593Smuzhiyun
1568*4882a593Smuzhiyun    def test_repr(self):
1569*4882a593Smuzhiyun        html = "<b>\N{SNOWMAN}</b>"
1570*4882a593Smuzhiyun        soup = self.soup(html)
1571*4882a593Smuzhiyun        if PY3K:
1572*4882a593Smuzhiyun            self.assertEqual(html, repr(soup))
1573*4882a593Smuzhiyun        else:
1574*4882a593Smuzhiyun            self.assertEqual(b'<b>\\u2603</b>', repr(soup))
1575*4882a593Smuzhiyun
1576*4882a593Smuzhiyunclass TestNavigableStringSubclasses(SoupTest):
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun    def test_cdata(self):
1579*4882a593Smuzhiyun        # None of the current builders turn CDATA sections into CData
1580*4882a593Smuzhiyun        # objects, but you can create them manually.
1581*4882a593Smuzhiyun        soup = self.soup("")
1582*4882a593Smuzhiyun        cdata = CData("foo")
1583*4882a593Smuzhiyun        soup.insert(1, cdata)
1584*4882a593Smuzhiyun        self.assertEqual(str(soup), "<![CDATA[foo]]>")
1585*4882a593Smuzhiyun        self.assertEqual(soup.find(text="foo"), "foo")
1586*4882a593Smuzhiyun        self.assertEqual(soup.contents[0], "foo")
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun    def test_cdata_is_never_formatted(self):
1589*4882a593Smuzhiyun        """Text inside a CData object is passed into the formatter.
1590*4882a593Smuzhiyun
1591*4882a593Smuzhiyun        But the return value is ignored.
1592*4882a593Smuzhiyun        """
1593*4882a593Smuzhiyun
1594*4882a593Smuzhiyun        self.count = 0
1595*4882a593Smuzhiyun        def increment(*args):
1596*4882a593Smuzhiyun            self.count += 1
1597*4882a593Smuzhiyun            return "BITTER FAILURE"
1598*4882a593Smuzhiyun
1599*4882a593Smuzhiyun        soup = self.soup("")
1600*4882a593Smuzhiyun        cdata = CData("<><><>")
1601*4882a593Smuzhiyun        soup.insert(1, cdata)
1602*4882a593Smuzhiyun        self.assertEqual(
1603*4882a593Smuzhiyun            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
1604*4882a593Smuzhiyun        self.assertEqual(1, self.count)
1605*4882a593Smuzhiyun
1606*4882a593Smuzhiyun    def test_doctype_ends_in_newline(self):
1607*4882a593Smuzhiyun        # Unlike other NavigableString subclasses, a DOCTYPE always ends
1608*4882a593Smuzhiyun        # in a newline.
1609*4882a593Smuzhiyun        doctype = Doctype("foo")
1610*4882a593Smuzhiyun        soup = self.soup("")
1611*4882a593Smuzhiyun        soup.insert(1, doctype)
1612*4882a593Smuzhiyun        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
1613*4882a593Smuzhiyun
1614*4882a593Smuzhiyun    def test_declaration(self):
1615*4882a593Smuzhiyun        d = Declaration("foo")
1616*4882a593Smuzhiyun        self.assertEqual("<?foo?>", d.output_ready())
1617*4882a593Smuzhiyun
1618*4882a593Smuzhiyunclass TestSoupSelector(TreeTest):
1619*4882a593Smuzhiyun
1620*4882a593Smuzhiyun    HTML = """
1621*4882a593Smuzhiyun<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
1622*4882a593Smuzhiyun"http://www.w3.org/TR/html4/strict.dtd">
1623*4882a593Smuzhiyun<html>
1624*4882a593Smuzhiyun<head>
1625*4882a593Smuzhiyun<title>The title</title>
1626*4882a593Smuzhiyun<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
1627*4882a593Smuzhiyun</head>
1628*4882a593Smuzhiyun<body>
1629*4882a593Smuzhiyun<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
1630*4882a593Smuzhiyun<div id="main" class="fancy">
1631*4882a593Smuzhiyun<div id="inner">
1632*4882a593Smuzhiyun<h1 id="header1">An H1</h1>
1633*4882a593Smuzhiyun<p>Some text</p>
1634*4882a593Smuzhiyun<p class="onep" id="p1">Some more text</p>
1635*4882a593Smuzhiyun<h2 id="header2">An H2</h2>
1636*4882a593Smuzhiyun<p class="class1 class2 class3" id="pmulti">Another</p>
1637*4882a593Smuzhiyun<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
1638*4882a593Smuzhiyun<h2 id="header3">Another H2</h2>
1639*4882a593Smuzhiyun<a id="me" href="http://simonwillison.net/" rel="me">me</a>
1640*4882a593Smuzhiyun<span class="s1">
1641*4882a593Smuzhiyun<a href="#" id="s1a1">span1a1</a>
1642*4882a593Smuzhiyun<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
1643*4882a593Smuzhiyun<span class="span2">
1644*4882a593Smuzhiyun<a href="#" id="s2a1">span2a1</a>
1645*4882a593Smuzhiyun</span>
1646*4882a593Smuzhiyun<span class="span3"></span>
1647*4882a593Smuzhiyun<custom-dashed-tag class="dashed" id="dash2"/>
1648*4882a593Smuzhiyun<div data-tag="dashedvalue" id="data1"/>
1649*4882a593Smuzhiyun</span>
1650*4882a593Smuzhiyun</div>
1651*4882a593Smuzhiyun<x id="xid">
1652*4882a593Smuzhiyun<z id="zida"/>
1653*4882a593Smuzhiyun<z id="zidab"/>
1654*4882a593Smuzhiyun<z id="zidac"/>
1655*4882a593Smuzhiyun</x>
1656*4882a593Smuzhiyun<y id="yid">
1657*4882a593Smuzhiyun<z id="zidb"/>
1658*4882a593Smuzhiyun</y>
1659*4882a593Smuzhiyun<p lang="en" id="lang-en">English</p>
1660*4882a593Smuzhiyun<p lang="en-gb" id="lang-en-gb">English UK</p>
1661*4882a593Smuzhiyun<p lang="en-us" id="lang-en-us">English US</p>
1662*4882a593Smuzhiyun<p lang="fr" id="lang-fr">French</p>
1663*4882a593Smuzhiyun</div>
1664*4882a593Smuzhiyun
1665*4882a593Smuzhiyun<div id="footer">
1666*4882a593Smuzhiyun</div>
1667*4882a593Smuzhiyun"""
1668*4882a593Smuzhiyun
1669*4882a593Smuzhiyun    def setUp(self):
1670*4882a593Smuzhiyun        self.soup = BeautifulSoup(self.HTML, 'html.parser')
1671*4882a593Smuzhiyun
1672*4882a593Smuzhiyun    def assertSelects(self, selector, expected_ids):
1673*4882a593Smuzhiyun        el_ids = [el['id'] for el in self.soup.select(selector)]
1674*4882a593Smuzhiyun        el_ids.sort()
1675*4882a593Smuzhiyun        expected_ids.sort()
1676*4882a593Smuzhiyun        self.assertEqual(expected_ids, el_ids,
1677*4882a593Smuzhiyun            "Selector %s, expected [%s], got [%s]" % (
1678*4882a593Smuzhiyun                selector, ', '.join(expected_ids), ', '.join(el_ids)
1679*4882a593Smuzhiyun            )
1680*4882a593Smuzhiyun        )
1681*4882a593Smuzhiyun
1682*4882a593Smuzhiyun    assertSelect = assertSelects
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun    def assertSelectMultiple(self, *tests):
1685*4882a593Smuzhiyun        for selector, expected_ids in tests:
1686*4882a593Smuzhiyun            self.assertSelect(selector, expected_ids)
1687*4882a593Smuzhiyun
1688*4882a593Smuzhiyun    def test_one_tag_one(self):
1689*4882a593Smuzhiyun        els = self.soup.select('title')
1690*4882a593Smuzhiyun        self.assertEqual(len(els), 1)
1691*4882a593Smuzhiyun        self.assertEqual(els[0].name, 'title')
1692*4882a593Smuzhiyun        self.assertEqual(els[0].contents, ['The title'])
1693*4882a593Smuzhiyun
1694*4882a593Smuzhiyun    def test_one_tag_many(self):
1695*4882a593Smuzhiyun        els = self.soup.select('div')
1696*4882a593Smuzhiyun        self.assertEqual(len(els), 4)
1697*4882a593Smuzhiyun        for div in els:
1698*4882a593Smuzhiyun            self.assertEqual(div.name, 'div')
1699*4882a593Smuzhiyun
1700*4882a593Smuzhiyun        el = self.soup.select_one('div')
1701*4882a593Smuzhiyun        self.assertEqual('main', el['id'])
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun    def test_select_one_returns_none_if_no_match(self):
1704*4882a593Smuzhiyun        match = self.soup.select_one('nonexistenttag')
1705*4882a593Smuzhiyun        self.assertEqual(None, match)
1706*4882a593Smuzhiyun
1707*4882a593Smuzhiyun
1708*4882a593Smuzhiyun    def test_tag_in_tag_one(self):
1709*4882a593Smuzhiyun        els = self.soup.select('div div')
1710*4882a593Smuzhiyun        self.assertSelects('div div', ['inner', 'data1'])
1711*4882a593Smuzhiyun
1712*4882a593Smuzhiyun    def test_tag_in_tag_many(self):
1713*4882a593Smuzhiyun        for selector in ('html div', 'html body div', 'body div'):
1714*4882a593Smuzhiyun            self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun    def test_tag_no_match(self):
1717*4882a593Smuzhiyun        self.assertEqual(len(self.soup.select('del')), 0)
1718*4882a593Smuzhiyun
1719*4882a593Smuzhiyun    def test_invalid_tag(self):
1720*4882a593Smuzhiyun        self.assertRaises(ValueError, self.soup.select, 'tag%t')
1721*4882a593Smuzhiyun
1722*4882a593Smuzhiyun    def test_select_dashed_tag_ids(self):
1723*4882a593Smuzhiyun        self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
1724*4882a593Smuzhiyun
1725*4882a593Smuzhiyun    def test_select_dashed_by_id(self):
1726*4882a593Smuzhiyun        dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
1727*4882a593Smuzhiyun        self.assertEqual(dashed[0].name, 'custom-dashed-tag')
1728*4882a593Smuzhiyun        self.assertEqual(dashed[0]['id'], 'dash2')
1729*4882a593Smuzhiyun
1730*4882a593Smuzhiyun    def test_dashed_tag_text(self):
1731*4882a593Smuzhiyun        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
1732*4882a593Smuzhiyun
1733*4882a593Smuzhiyun    def test_select_dashed_matches_find_all(self):
1734*4882a593Smuzhiyun        self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
1735*4882a593Smuzhiyun
1736*4882a593Smuzhiyun    def test_header_tags(self):
1737*4882a593Smuzhiyun        self.assertSelectMultiple(
1738*4882a593Smuzhiyun            ('h1', ['header1']),
1739*4882a593Smuzhiyun            ('h2', ['header2', 'header3']),
1740*4882a593Smuzhiyun        )
1741*4882a593Smuzhiyun
1742*4882a593Smuzhiyun    def test_class_one(self):
1743*4882a593Smuzhiyun        for selector in ('.onep', 'p.onep', 'html p.onep'):
1744*4882a593Smuzhiyun            els = self.soup.select(selector)
1745*4882a593Smuzhiyun            self.assertEqual(len(els), 1)
1746*4882a593Smuzhiyun            self.assertEqual(els[0].name, 'p')
1747*4882a593Smuzhiyun            self.assertEqual(els[0]['class'], ['onep'])
1748*4882a593Smuzhiyun
1749*4882a593Smuzhiyun    def test_class_mismatched_tag(self):
1750*4882a593Smuzhiyun        els = self.soup.select('div.onep')
1751*4882a593Smuzhiyun        self.assertEqual(len(els), 0)
1752*4882a593Smuzhiyun
1753*4882a593Smuzhiyun    def test_one_id(self):
1754*4882a593Smuzhiyun        for selector in ('div#inner', '#inner', 'div div#inner'):
1755*4882a593Smuzhiyun            self.assertSelects(selector, ['inner'])
1756*4882a593Smuzhiyun
1757*4882a593Smuzhiyun    def test_bad_id(self):
1758*4882a593Smuzhiyun        els = self.soup.select('#doesnotexist')
1759*4882a593Smuzhiyun        self.assertEqual(len(els), 0)
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun    def test_items_in_id(self):
1762*4882a593Smuzhiyun        els = self.soup.select('div#inner p')
1763*4882a593Smuzhiyun        self.assertEqual(len(els), 3)
1764*4882a593Smuzhiyun        for el in els:
1765*4882a593Smuzhiyun            self.assertEqual(el.name, 'p')
1766*4882a593Smuzhiyun        self.assertEqual(els[1]['class'], ['onep'])
1767*4882a593Smuzhiyun        self.assertFalse(els[0].has_attr('class'))
1768*4882a593Smuzhiyun
1769*4882a593Smuzhiyun    def test_a_bunch_of_emptys(self):
1770*4882a593Smuzhiyun        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
1771*4882a593Smuzhiyun            self.assertEqual(len(self.soup.select(selector)), 0)
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun    def test_multi_class_support(self):
1774*4882a593Smuzhiyun        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
1775*4882a593Smuzhiyun            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
1776*4882a593Smuzhiyun            self.assertSelects(selector, ['pmulti'])
1777*4882a593Smuzhiyun
1778*4882a593Smuzhiyun    def test_multi_class_selection(self):
1779*4882a593Smuzhiyun        for selector in ('.class1.class3', '.class3.class2',
1780*4882a593Smuzhiyun                         '.class1.class2.class3'):
1781*4882a593Smuzhiyun            self.assertSelects(selector, ['pmulti'])
1782*4882a593Smuzhiyun
1783*4882a593Smuzhiyun    def test_child_selector(self):
1784*4882a593Smuzhiyun        self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
1785*4882a593Smuzhiyun        self.assertSelects('.s1 > a span', ['s1a2s1'])
1786*4882a593Smuzhiyun
1787*4882a593Smuzhiyun    def test_child_selector_id(self):
1788*4882a593Smuzhiyun        self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1'])
1789*4882a593Smuzhiyun
1790*4882a593Smuzhiyun    def test_attribute_equals(self):
1791*4882a593Smuzhiyun        self.assertSelectMultiple(
1792*4882a593Smuzhiyun            ('p[class="onep"]', ['p1']),
1793*4882a593Smuzhiyun            ('p[id="p1"]', ['p1']),
1794*4882a593Smuzhiyun            ('[class="onep"]', ['p1']),
1795*4882a593Smuzhiyun            ('[id="p1"]', ['p1']),
1796*4882a593Smuzhiyun            ('link[rel="stylesheet"]', ['l1']),
1797*4882a593Smuzhiyun            ('link[type="text/css"]', ['l1']),
1798*4882a593Smuzhiyun            ('link[href="blah.css"]', ['l1']),
1799*4882a593Smuzhiyun            ('link[href="no-blah.css"]', []),
1800*4882a593Smuzhiyun            ('[rel="stylesheet"]', ['l1']),
1801*4882a593Smuzhiyun            ('[type="text/css"]', ['l1']),
1802*4882a593Smuzhiyun            ('[href="blah.css"]', ['l1']),
1803*4882a593Smuzhiyun            ('[href="no-blah.css"]', []),
1804*4882a593Smuzhiyun            ('p[href="no-blah.css"]', []),
1805*4882a593Smuzhiyun            ('[href="no-blah.css"]', []),
1806*4882a593Smuzhiyun        )
1807*4882a593Smuzhiyun
1808*4882a593Smuzhiyun    def test_attribute_tilde(self):
1809*4882a593Smuzhiyun        self.assertSelectMultiple(
1810*4882a593Smuzhiyun            ('p[class~="class1"]', ['pmulti']),
1811*4882a593Smuzhiyun            ('p[class~="class2"]', ['pmulti']),
1812*4882a593Smuzhiyun            ('p[class~="class3"]', ['pmulti']),
1813*4882a593Smuzhiyun            ('[class~="class1"]', ['pmulti']),
1814*4882a593Smuzhiyun            ('[class~="class2"]', ['pmulti']),
1815*4882a593Smuzhiyun            ('[class~="class3"]', ['pmulti']),
1816*4882a593Smuzhiyun            ('a[rel~="friend"]', ['bob']),
1817*4882a593Smuzhiyun            ('a[rel~="met"]', ['bob']),
1818*4882a593Smuzhiyun            ('[rel~="friend"]', ['bob']),
1819*4882a593Smuzhiyun            ('[rel~="met"]', ['bob']),
1820*4882a593Smuzhiyun        )
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun    def test_attribute_startswith(self):
1823*4882a593Smuzhiyun        self.assertSelectMultiple(
1824*4882a593Smuzhiyun            ('[rel^="style"]', ['l1']),
1825*4882a593Smuzhiyun            ('link[rel^="style"]', ['l1']),
1826*4882a593Smuzhiyun            ('notlink[rel^="notstyle"]', []),
1827*4882a593Smuzhiyun            ('[rel^="notstyle"]', []),
1828*4882a593Smuzhiyun            ('link[rel^="notstyle"]', []),
1829*4882a593Smuzhiyun            ('link[href^="bla"]', ['l1']),
1830*4882a593Smuzhiyun            ('a[href^="http://"]', ['bob', 'me']),
1831*4882a593Smuzhiyun            ('[href^="http://"]', ['bob', 'me']),
1832*4882a593Smuzhiyun            ('[id^="p"]', ['pmulti', 'p1']),
1833*4882a593Smuzhiyun            ('[id^="m"]', ['me', 'main']),
1834*4882a593Smuzhiyun            ('div[id^="m"]', ['main']),
1835*4882a593Smuzhiyun            ('a[id^="m"]', ['me']),
1836*4882a593Smuzhiyun            ('div[data-tag^="dashed"]', ['data1'])
1837*4882a593Smuzhiyun        )
1838*4882a593Smuzhiyun
1839*4882a593Smuzhiyun    def test_attribute_endswith(self):
1840*4882a593Smuzhiyun        self.assertSelectMultiple(
1841*4882a593Smuzhiyun            ('[href$=".css"]', ['l1']),
1842*4882a593Smuzhiyun            ('link[href$=".css"]', ['l1']),
1843*4882a593Smuzhiyun            ('link[id$="1"]', ['l1']),
1844*4882a593Smuzhiyun            ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
1845*4882a593Smuzhiyun            ('div[id$="1"]', ['data1']),
1846*4882a593Smuzhiyun            ('[id$="noending"]', []),
1847*4882a593Smuzhiyun        )
1848*4882a593Smuzhiyun
1849*4882a593Smuzhiyun    def test_attribute_contains(self):
1850*4882a593Smuzhiyun        self.assertSelectMultiple(
1851*4882a593Smuzhiyun            # From test_attribute_startswith
1852*4882a593Smuzhiyun            ('[rel*="style"]', ['l1']),
1853*4882a593Smuzhiyun            ('link[rel*="style"]', ['l1']),
1854*4882a593Smuzhiyun            ('notlink[rel*="notstyle"]', []),
1855*4882a593Smuzhiyun            ('[rel*="notstyle"]', []),
1856*4882a593Smuzhiyun            ('link[rel*="notstyle"]', []),
1857*4882a593Smuzhiyun            ('link[href*="bla"]', ['l1']),
1858*4882a593Smuzhiyun            ('[href*="http://"]', ['bob', 'me']),
1859*4882a593Smuzhiyun            ('[id*="p"]', ['pmulti', 'p1']),
1860*4882a593Smuzhiyun            ('div[id*="m"]', ['main']),
1861*4882a593Smuzhiyun            ('a[id*="m"]', ['me']),
1862*4882a593Smuzhiyun            # From test_attribute_endswith
1863*4882a593Smuzhiyun            ('[href*=".css"]', ['l1']),
1864*4882a593Smuzhiyun            ('link[href*=".css"]', ['l1']),
1865*4882a593Smuzhiyun            ('link[id*="1"]', ['l1']),
1866*4882a593Smuzhiyun            ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
1867*4882a593Smuzhiyun            ('div[id*="1"]', ['data1']),
1868*4882a593Smuzhiyun            ('[id*="noending"]', []),
1869*4882a593Smuzhiyun            # New for this test
1870*4882a593Smuzhiyun            ('[href*="."]', ['bob', 'me', 'l1']),
1871*4882a593Smuzhiyun            ('a[href*="."]', ['bob', 'me']),
1872*4882a593Smuzhiyun            ('link[href*="."]', ['l1']),
1873*4882a593Smuzhiyun            ('div[id*="n"]', ['main', 'inner']),
1874*4882a593Smuzhiyun            ('div[id*="nn"]', ['inner']),
1875*4882a593Smuzhiyun            ('div[data-tag*="edval"]', ['data1'])
1876*4882a593Smuzhiyun        )
1877*4882a593Smuzhiyun
1878*4882a593Smuzhiyun    def test_attribute_exact_or_hypen(self):
1879*4882a593Smuzhiyun        self.assertSelectMultiple(
1880*4882a593Smuzhiyun            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
1881*4882a593Smuzhiyun            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
1882*4882a593Smuzhiyun            ('p[lang|="fr"]', ['lang-fr']),
1883*4882a593Smuzhiyun            ('p[lang|="gb"]', []),
1884*4882a593Smuzhiyun        )
1885*4882a593Smuzhiyun
1886*4882a593Smuzhiyun    def test_attribute_exists(self):
1887*4882a593Smuzhiyun        self.assertSelectMultiple(
1888*4882a593Smuzhiyun            ('[rel]', ['l1', 'bob', 'me']),
1889*4882a593Smuzhiyun            ('link[rel]', ['l1']),
1890*4882a593Smuzhiyun            ('a[rel]', ['bob', 'me']),
1891*4882a593Smuzhiyun            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
1892*4882a593Smuzhiyun            ('p[class]', ['p1', 'pmulti']),
1893*4882a593Smuzhiyun            ('[blah]', []),
1894*4882a593Smuzhiyun            ('p[blah]', []),
1895*4882a593Smuzhiyun            ('div[data-tag]', ['data1'])
1896*4882a593Smuzhiyun        )
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun    def test_unsupported_pseudoclass(self):
1899*4882a593Smuzhiyun        self.assertRaises(
1900*4882a593Smuzhiyun            NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
1901*4882a593Smuzhiyun
1902*4882a593Smuzhiyun        self.assertRaises(
1903*4882a593Smuzhiyun            NotImplementedError, self.soup.select, "a:nth-of-type(a)")
1904*4882a593Smuzhiyun
1905*4882a593Smuzhiyun
1906*4882a593Smuzhiyun    def test_nth_of_type(self):
1907*4882a593Smuzhiyun        # Try to select first paragraph
1908*4882a593Smuzhiyun        els = self.soup.select('div#inner p:nth-of-type(1)')
1909*4882a593Smuzhiyun        self.assertEqual(len(els), 1)
1910*4882a593Smuzhiyun        self.assertEqual(els[0].string, 'Some text')
1911*4882a593Smuzhiyun
1912*4882a593Smuzhiyun        # Try to select third paragraph
1913*4882a593Smuzhiyun        els = self.soup.select('div#inner p:nth-of-type(3)')
1914*4882a593Smuzhiyun        self.assertEqual(len(els), 1)
1915*4882a593Smuzhiyun        self.assertEqual(els[0].string, 'Another')
1916*4882a593Smuzhiyun
1917*4882a593Smuzhiyun        # Try to select (non-existent!) fourth paragraph
1918*4882a593Smuzhiyun        els = self.soup.select('div#inner p:nth-of-type(4)')
1919*4882a593Smuzhiyun        self.assertEqual(len(els), 0)
1920*4882a593Smuzhiyun
1921*4882a593Smuzhiyun        # Pass in an invalid value.
1922*4882a593Smuzhiyun        self.assertRaises(
1923*4882a593Smuzhiyun            ValueError, self.soup.select, 'div p:nth-of-type(0)')
1924*4882a593Smuzhiyun
1925*4882a593Smuzhiyun    def test_nth_of_type_direct_descendant(self):
1926*4882a593Smuzhiyun        els = self.soup.select('div#inner > p:nth-of-type(1)')
1927*4882a593Smuzhiyun        self.assertEqual(len(els), 1)
1928*4882a593Smuzhiyun        self.assertEqual(els[0].string, 'Some text')
1929*4882a593Smuzhiyun
1930*4882a593Smuzhiyun    def test_id_child_selector_nth_of_type(self):
1931*4882a593Smuzhiyun        self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
1932*4882a593Smuzhiyun
1933*4882a593Smuzhiyun    def test_select_on_element(self):
1934*4882a593Smuzhiyun        # Other tests operate on the tree; this operates on an element
1935*4882a593Smuzhiyun        # within the tree.
1936*4882a593Smuzhiyun        inner = self.soup.find("div", id="main")
1937*4882a593Smuzhiyun        selected = inner.select("div")
1938*4882a593Smuzhiyun        # The <div id="inner"> tag was selected. The <div id="footer">
1939*4882a593Smuzhiyun        # tag was not.
1940*4882a593Smuzhiyun        self.assertSelectsIDs(selected, ['inner', 'data1'])
1941*4882a593Smuzhiyun
1942*4882a593Smuzhiyun    def test_overspecified_child_id(self):
1943*4882a593Smuzhiyun        self.assertSelects(".fancy #inner", ['inner'])
1944*4882a593Smuzhiyun        self.assertSelects(".normal #inner", [])
1945*4882a593Smuzhiyun
1946*4882a593Smuzhiyun    def test_adjacent_sibling_selector(self):
1947*4882a593Smuzhiyun        self.assertSelects('#p1 + h2', ['header2'])
1948*4882a593Smuzhiyun        self.assertSelects('#p1 + h2 + p', ['pmulti'])
1949*4882a593Smuzhiyun        self.assertSelects('#p1 + #header2 + .class1', ['pmulti'])
1950*4882a593Smuzhiyun        self.assertEqual([], self.soup.select('#p1 + p'))
1951*4882a593Smuzhiyun
1952*4882a593Smuzhiyun    def test_general_sibling_selector(self):
1953*4882a593Smuzhiyun        self.assertSelects('#p1 ~ h2', ['header2', 'header3'])
1954*4882a593Smuzhiyun        self.assertSelects('#p1 ~ #header2', ['header2'])
1955*4882a593Smuzhiyun        self.assertSelects('#p1 ~ h2 + a', ['me'])
1956*4882a593Smuzhiyun        self.assertSelects('#p1 ~ h2 + [rel="me"]', ['me'])
1957*4882a593Smuzhiyun        self.assertEqual([], self.soup.select('#inner ~ h2'))
1958*4882a593Smuzhiyun
1959*4882a593Smuzhiyun    def test_dangling_combinator(self):
1960*4882a593Smuzhiyun        self.assertRaises(ValueError, self.soup.select, 'h1 >')
1961*4882a593Smuzhiyun
1962*4882a593Smuzhiyun    def test_sibling_combinator_wont_select_same_tag_twice(self):
1963*4882a593Smuzhiyun        self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
1964*4882a593Smuzhiyun
1965*4882a593Smuzhiyun    # Test the selector grouping operator (the comma)
1966*4882a593Smuzhiyun    def test_multiple_select(self):
1967*4882a593Smuzhiyun        self.assertSelects('x, y', ['xid', 'yid'])
1968*4882a593Smuzhiyun
1969*4882a593Smuzhiyun    def test_multiple_select_with_no_space(self):
1970*4882a593Smuzhiyun        self.assertSelects('x,y', ['xid', 'yid'])
1971*4882a593Smuzhiyun
1972*4882a593Smuzhiyun    def test_multiple_select_with_more_space(self):
1973*4882a593Smuzhiyun        self.assertSelects('x,    y', ['xid', 'yid'])
1974*4882a593Smuzhiyun
1975*4882a593Smuzhiyun    def test_multiple_select_duplicated(self):
1976*4882a593Smuzhiyun        self.assertSelects('x, x', ['xid'])
1977*4882a593Smuzhiyun
1978*4882a593Smuzhiyun    def test_multiple_select_sibling(self):
1979*4882a593Smuzhiyun        self.assertSelects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
1980*4882a593Smuzhiyun
1981*4882a593Smuzhiyun    def test_multiple_select_tag_and_direct_descendant(self):
1982*4882a593Smuzhiyun        self.assertSelects('x, y > z', ['xid', 'zidb'])
1983*4882a593Smuzhiyun
1984*4882a593Smuzhiyun    def test_multiple_select_direct_descendant_and_tags(self):
1985*4882a593Smuzhiyun        self.assertSelects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
1986*4882a593Smuzhiyun
1987*4882a593Smuzhiyun    def test_multiple_select_indirect_descendant(self):
1988*4882a593Smuzhiyun        self.assertSelects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
1989*4882a593Smuzhiyun
1990*4882a593Smuzhiyun    def test_invalid_multiple_select(self):
1991*4882a593Smuzhiyun        self.assertRaises(ValueError, self.soup.select, ',x, y')
1992*4882a593Smuzhiyun        self.assertRaises(ValueError, self.soup.select, 'x,,y')
1993*4882a593Smuzhiyun
1994*4882a593Smuzhiyun    def test_multiple_select_attrs(self):
1995*4882a593Smuzhiyun        self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
1996*4882a593Smuzhiyun
1997*4882a593Smuzhiyun    def test_multiple_select_ids(self):
1998*4882a593Smuzhiyun        self.assertSelects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
1999*4882a593Smuzhiyun
2000*4882a593Smuzhiyun    def test_multiple_select_nested(self):
2001*4882a593Smuzhiyun        self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
2002*4882a593Smuzhiyun
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun
2005