1From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
2From: Stefan Behnel <stefan_ml@behnel.de>
3Date: Fri, 1 Jul 2022 21:06:10 +0200
4Subject: [PATCH] Fix a crash when incorrect parser input occurs together with
5 usages of iterwalk() on trees generated by the same parser.
6
7CVE: CVE-2022-2309
8
9Upstream-Status: Backport
10[https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f]
11
12Signed-off-by: Yue Tao <yue.tao@windriver.com>
13
14---
15 src/lxml/apihelpers.pxi      |  7 ++++---
16 src/lxml/iterparse.pxi       | 11 ++++++-----
17 src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
18 3 files changed, 30 insertions(+), 8 deletions(-)
19
20diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
21index c1662762..9fae9fb1 100644
22--- a/src/lxml/apihelpers.pxi
23+++ b/src/lxml/apihelpers.pxi
24@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
25     while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
26         c_ns = c_node.nsDef
27         while c_ns is not NULL:
28-            prefix = funicodeOrNone(c_ns.prefix)
29-            if prefix not in nsmap:
30-                nsmap[prefix] = funicodeOrNone(c_ns.href)
31+            if c_ns.prefix or c_ns.href:
32+                prefix = funicodeOrNone(c_ns.prefix)
33+                if prefix not in nsmap:
34+                    nsmap[prefix] = funicodeOrNone(c_ns.href)
35             c_ns = c_ns.next
36         c_node = c_node.parent
37     return nsmap
38diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
39index 138c23a6..a7299da6 100644
40--- a/src/lxml/iterparse.pxi
41+++ b/src/lxml/iterparse.pxi
42@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
43     count = 0
44     c_ns = c_node.nsDef
45     while c_ns is not NULL:
46-        count += 1
47+        count += (c_ns.href is not NULL)
48         c_ns = c_ns.next
49     return count
50
51@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
52     count = 0
53     c_ns = c_node.nsDef
54     while c_ns is not NULL:
55-        ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
56-                    funicode(c_ns.href))
57-        event_list.append( (u"start-ns", ns_tuple) )
58-        count += 1
59+        if c_ns.href:
60+            ns_tuple = (funicodeOrEmpty(c_ns.prefix),
61+                        funicode(c_ns.href))
62+            event_list.append( (u"start-ns", ns_tuple) )
63+            count += 1
64         c_ns = c_ns.next
65     return count
66diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
67index e5f08469..285313f6 100644
68--- a/src/lxml/tests/test_etree.py
69+++ b/src/lxml/tests/test_etree.py
70@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase):
71             [1,2,1,4],
72             counts)
73
74+    def test_walk_after_parse_failure(self):
75+        # This used to be an issue because libxml2 can leak empty namespaces
76+        # between failed parser runs.  iterwalk() failed to handle such a tree.
77+        try:
78+            etree.XML('''<anot xmlns="1">''')
79+        except etree.XMLSyntaxError:
80+            pass
81+        else:
82+            assert False, "invalid input did not fail to parse"
83+
84+        et = etree.XML('''<root>  </root>''')
85+        try:
86+            ns = next(etree.iterwalk(et, events=('start-ns',)))
87+        except StopIteration:
88+            # This would be the expected result, because there was no namespace
89+            pass
90+        else:
91+            # This is a bug in libxml2
92+            assert not ns, repr(ns)
93+
94     def test_itertext_comment_pi(self):
95         # https://bugs.launchpad.net/lxml/+bug/1844674
96         XML = self.etree.XML
97--
982.17.1
99
100