1From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001 2From: Stefan Behnel <stefan_ml@behnel.de> 3Date: Fri, 1 Jul 2022 21:06:10 +0200 4Subject: [PATCH] Fix a crash when incorrect parser input occurs together with 5 usages of iterwalk() on trees generated by the same parser. 6 7CVE: CVE-2022-2309 8 9Upstream-Status: Backport 10[https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f] 11 12Signed-off-by: Yue Tao <yue.tao@windriver.com> 13 14--- 15 src/lxml/apihelpers.pxi | 7 ++++--- 16 src/lxml/iterparse.pxi | 11 ++++++----- 17 src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++ 18 3 files changed, 30 insertions(+), 8 deletions(-) 19 20diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi 21index c1662762..9fae9fb1 100644 22--- a/src/lxml/apihelpers.pxi 23+++ b/src/lxml/apihelpers.pxi 24@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node): 25 while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: 26 c_ns = c_node.nsDef 27 while c_ns is not NULL: 28- prefix = funicodeOrNone(c_ns.prefix) 29- if prefix not in nsmap: 30- nsmap[prefix] = funicodeOrNone(c_ns.href) 31+ if c_ns.prefix or c_ns.href: 32+ prefix = funicodeOrNone(c_ns.prefix) 33+ if prefix not in nsmap: 34+ nsmap[prefix] = funicodeOrNone(c_ns.href) 35 c_ns = c_ns.next 36 c_node = c_node.parent 37 return nsmap 38diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi 39index 138c23a6..a7299da6 100644 40--- a/src/lxml/iterparse.pxi 41+++ b/src/lxml/iterparse.pxi 42@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node): 43 count = 0 44 c_ns = c_node.nsDef 45 while c_ns is not NULL: 46- count += 1 47+ count += (c_ns.href is not NULL) 48 c_ns = c_ns.next 49 return count 50 51@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: 52 count = 0 53 c_ns = c_node.nsDef 54 while c_ns is not NULL: 55- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '', 56- funicode(c_ns.href)) 57- event_list.append( (u"start-ns", ns_tuple) ) 58- count += 1 59+ if c_ns.href: 60+ ns_tuple = (funicodeOrEmpty(c_ns.prefix), 61+ funicode(c_ns.href)) 62+ event_list.append( (u"start-ns", ns_tuple) ) 63+ count += 1 64 c_ns = c_ns.next 65 return count 66diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py 67index e5f08469..285313f6 100644 68--- a/src/lxml/tests/test_etree.py 69+++ b/src/lxml/tests/test_etree.py 70@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase): 71 [1,2,1,4], 72 counts) 73 74+ def test_walk_after_parse_failure(self): 75+ # This used to be an issue because libxml2 can leak empty namespaces 76+ # between failed parser runs. iterwalk() failed to handle such a tree. 77+ try: 78+ etree.XML('''<anot xmlns="1">''') 79+ except etree.XMLSyntaxError: 80+ pass 81+ else: 82+ assert False, "invalid input did not fail to parse" 83+ 84+ et = etree.XML('''<root> </root>''') 85+ try: 86+ ns = next(etree.iterwalk(et, events=('start-ns',))) 87+ except StopIteration: 88+ # This would be the expected result, because there was no namespace 89+ pass 90+ else: 91+ # This is a bug in libxml2 92+ assert not ns, repr(ns) 93+ 94 def test_itertext_comment_pi(self): 95 # https://bugs.launchpad.net/lxml/+bug/1844674 96 XML = self.etree.XML 97-- 982.17.1 99 100