1From 925760291d6efec64fda6e9dd1fd9cfbd5be068c Mon Sep 17 00:00:00 2001 2From: Mike Bayer <mike_mp@zzzcomputing.com> 3Date: Mon, 29 Aug 2022 12:28:52 -0400 4Subject: [PATCH] fix tag regexp to match quoted groups correctly 5 6Fixed issue in lexer where the regexp used to match tags would not 7correctly interpret quoted sections individually. While this parsing issue 8still produced the same expected tag structure later on, the mis-handling 9of quoted sections was also subject to a regexp crash if a tag had a large 10number of quotes within its quoted sections. 11 12Fixes: #366 13Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0 14 15Upstream-Status: Backport [https://github.com/sqlalchemy/mako/commit/925760291d6efec64fda6e9dd1fd9cfbd5be068c] 16 17Signed-off-by: <narpat.mali@windriver.com> 18 19--- 20 doc/build/unreleased/366.rst | 9 +++++++++ 21 mako/lexer.py | 12 ++++++++---- 22 test/test_lexer.py | 21 +++++++++++++++++---- 23 3 files changed, 34 insertions(+), 8 deletions(-) 24 create mode 100644 doc/build/unreleased/366.rst 25 26--- /dev/null 27+++ Mako-1.1.6/doc/build/unreleased/366.rst 28@@ -0,0 +1,9 @@ 29+.. change:: 30+ :tags: bug, lexer 31+ :tickets: 366 32+ 33+ Fixed issue in lexer where the regexp used to match tags would not 34+ correctly interpret quoted sections individually. While this parsing issue 35+ still produced the same expected tag structure later on, the mis-handling 36+ of quoted sections was also subject to a regexp crash if a tag had a large 37+ number of quotes within its quoted sections. 38\ No newline at end of file 39--- Mako-1.1.6.orig/mako/lexer.py 40+++ Mako-1.1.6/mako/lexer.py 41@@ -295,20 +295,24 @@ class Lexer(object): 42 return self.template 43 44 def match_tag_start(self): 45- match = self.match( 46- r""" 47+ reg = r""" 48 \<% # opening tag 49 50 ([\w\.\:]+) # keyword 51 52- ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ 53+ ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ 54 # sign, string expression 55+ # comma is for backwards compat 56+ # identified in #366 57 58 \s* # more whitespace 59 60 (/)?> # closing 61 62- """, 63+ """ 64+ 65+ match = self.match( 66+ reg, 67 re.I | re.S | re.X, 68 ) 69 70--- Mako-1.1.6.orig/test/test_lexer.py 71+++ Mako-1.1.6/test/test_lexer.py 72@@ -1,5 +1,7 @@ 73 import re 74 75+import pytest 76+ 77 from mako import compat 78 from mako import exceptions 79 from mako import parsetree 80@@ -146,6 +148,10 @@ class LexerTest(TemplateTest): 81 """ 82 self.assertRaises(exceptions.CompileException, Lexer(template).parse) 83 84+ def test_tag_many_quotes(self): 85+ template = "<%0" + '"' * 3000 86+ assert_raises(exceptions.SyntaxException, Lexer(template).parse) 87+ 88 def test_unmatched_tag(self): 89 template = """ 90 <%namespace name="bar"> 91@@ -432,9 +438,16 @@ class LexerTest(TemplateTest): 92 ), 93 ) 94 95- def test_pagetag(self): 96- template = """ 97- <%page cached="True", args="a, b"/> 98+ @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)]) 99+ def test_pagetag(self, comma, numchars): 100+ # note that the comma here looks like: 101+ # <%page cached="True", args="a, b"/> 102+ # that's what this test has looked like for decades, however, the 103+ # comma there is not actually the right syntax. When issue #366 104+ # was fixed, the reg was altered to accommodate for this comma to allow 105+ # backwards compat 106+ template = f""" 107+ <%page cached="True"{comma} args="a, b"/> 108 109 some template 110 """ 111@@ -453,7 +466,7 @@ class LexerTest(TemplateTest): 112 113 some template 114 """, 115- (2, 48), 116+ (2, numchars), 117 ), 118 ], 119 ), 120