1From e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997 Mon Sep 17 00:00:00 2001 2From: Russ Cox <rsc@golang.org> 3Date: Wed, 28 Sep 2022 11:18:51 -0400 4Subject: [PATCH] [release-branch.go1.18] regexp: limit size of parsed regexps 5 6Set a 128 MB limit on the amount of space used by []syntax.Inst 7in the compiled form corresponding to a given regexp. 8 9Also set a 128 MB limit on the rune storage in the *syntax.Regexp 10tree itself. 11 12Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue. 13 14Fixes CVE-2022-41715. 15Updates #55949. 16Fixes #55950. 17 18Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b 19Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136 20TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com> 21Reviewed-by: Damien Neil <dneil@google.com> 22Run-TryBot: Roland Shoemaker <bracewell@google.com> 23Reviewed-by: Julie Qiu <julieqiu@google.com> 24Reviewed-on: https://go-review.googlesource.com/c/go/+/438501 25Run-TryBot: Carlos Amedee <carlos@golang.org> 26Reviewed-by: Carlos Amedee <carlos@golang.org> 27Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> 28TryBot-Result: Gopher Robot <gobot@golang.org> 29Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org> 30 31Upstream-Status: Backport [https://github.com/golang/go/commit/e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997] 32CVE: CVE-2022-41715 33Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com> 34--- 35 src/regexp/syntax/parse.go | 145 ++++++++++++++++++++++++++++++-- 36 src/regexp/syntax/parse_test.go | 13 +-- 37 2 files changed, 148 insertions(+), 10 deletions(-) 38 39diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go 40index d7cf2af..3792960 100644 41--- a/src/regexp/syntax/parse.go 42+++ b/src/regexp/syntax/parse.go 43@@ -90,15 +90,49 @@ const ( 44 // until we've allocated at least maxHeight Regexp structures. 45 const maxHeight = 1000 46 47+// maxSize is the maximum size of a compiled regexp in Insts. 48+// It too is somewhat arbitrarily chosen, but the idea is to be large enough 49+// to allow significant regexps while at the same time small enough that 50+// the compiled form will not take up too much memory. 51+// 128 MB is enough for a 3.3 million Inst structures, which roughly 52+// corresponds to a 3.3 MB regexp. 53+const ( 54+ maxSize = 128 << 20 / instSize 55+ instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words 56+) 57+ 58+// maxRunes is the maximum number of runes allowed in a regexp tree 59+// counting the runes in all the nodes. 60+// Ignoring character classes p.numRunes is always less than the length of the regexp. 61+// Character classes can make it much larger: each \pL adds 1292 runes. 62+// 128 MB is enough for 32M runes, which is over 26k \pL instances. 63+// Note that repetitions do not make copies of the rune slices, 64+// so \pL{1000} is only one rune slice, not 1000. 65+// We could keep a cache of character classes we've seen, 66+// so that all the \pL we see use the same rune list, 67+// but that doesn't remove the problem entirely: 68+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()]. 69+// And because the Rune slice is exposed directly in the Regexp, 70+// there is not an opportunity to change the representation to allow 71+// partial sharing between different character classes. 72+// So the limit is the best we can do. 73+const ( 74+ maxRunes = 128 << 20 / runeSize 75+ runeSize = 4 // rune is int32 76+) 77+ 78 type parser struct { 79 flags Flags // parse mode flags 80 stack []*Regexp // stack of parsed expressions 81 free *Regexp 82 numCap int // number of capturing groups seen 83 wholeRegexp string 84- tmpClass []rune // temporary char class work space 85- numRegexp int // number of regexps allocated 86- height map[*Regexp]int // regexp height for height limit check 87+ tmpClass []rune // temporary char class work space 88+ numRegexp int // number of regexps allocated 89+ numRunes int // number of runes in char classes 90+ repeats int64 // product of all repetitions seen 91+ height map[*Regexp]int // regexp height, for height limit check 92+ size map[*Regexp]int64 // regexp compiled size, for size limit check 93 } 94 95 func (p *parser) newRegexp(op Op) *Regexp { 96@@ -122,6 +156,104 @@ func (p *parser) reuse(re *Regexp) { 97 p.free = re 98 } 99 100+func (p *parser) checkLimits(re *Regexp) { 101+ if p.numRunes > maxRunes { 102+ panic(ErrInternalError) 103+ } 104+ p.checkSize(re) 105+ p.checkHeight(re) 106+} 107+ 108+func (p *parser) checkSize(re *Regexp) { 109+ if p.size == nil { 110+ // We haven't started tracking size yet. 111+ // Do a relatively cheap check to see if we need to start. 112+ // Maintain the product of all the repeats we've seen 113+ // and don't track if the total number of regexp nodes 114+ // we've seen times the repeat product is in budget. 115+ if p.repeats == 0 { 116+ p.repeats = 1 117+ } 118+ if re.Op == OpRepeat { 119+ n := re.Max 120+ if n == -1 { 121+ n = re.Min 122+ } 123+ if n <= 0 { 124+ n = 1 125+ } 126+ if int64(n) > maxSize/p.repeats { 127+ p.repeats = maxSize 128+ } else { 129+ p.repeats *= int64(n) 130+ } 131+ } 132+ if int64(p.numRegexp) < maxSize/p.repeats { 133+ return 134+ } 135+ 136+ // We need to start tracking size. 137+ // Make the map and belatedly populate it 138+ // with info about everything we've constructed so far. 139+ p.size = make(map[*Regexp]int64) 140+ for _, re := range p.stack { 141+ p.checkSize(re) 142+ } 143+ } 144+ 145+ if p.calcSize(re, true) > maxSize { 146+ panic(ErrInternalError) 147+ } 148+} 149+ 150+func (p *parser) calcSize(re *Regexp, force bool) int64 { 151+ if !force { 152+ if size, ok := p.size[re]; ok { 153+ return size 154+ } 155+ } 156+ 157+ var size int64 158+ switch re.Op { 159+ case OpLiteral: 160+ size = int64(len(re.Rune)) 161+ case OpCapture, OpStar: 162+ // star can be 1+ or 2+; assume 2 pessimistically 163+ size = 2 + p.calcSize(re.Sub[0], false) 164+ case OpPlus, OpQuest: 165+ size = 1 + p.calcSize(re.Sub[0], false) 166+ case OpConcat: 167+ for _, sub := range re.Sub { 168+ size += p.calcSize(sub, false) 169+ } 170+ case OpAlternate: 171+ for _, sub := range re.Sub { 172+ size += p.calcSize(sub, false) 173+ } 174+ if len(re.Sub) > 1 { 175+ size += int64(len(re.Sub)) - 1 176+ } 177+ case OpRepeat: 178+ sub := p.calcSize(re.Sub[0], false) 179+ if re.Max == -1 { 180+ if re.Min == 0 { 181+ size = 2 + sub // x* 182+ } else { 183+ size = 1 + int64(re.Min)*sub // xxx+ 184+ } 185+ break 186+ } 187+ // x{2,5} = xx(x(x(x)?)?)? 188+ size = int64(re.Max)*sub + int64(re.Max-re.Min) 189+ } 190+ 191+ if size < 1 { 192+ size = 1 193+ } 194+ p.size[re] = size 195+ return size 196+} 197+ 198 func (p *parser) checkHeight(re *Regexp) { 199 if p.numRegexp < maxHeight { 200 return 201@@ -158,6 +290,7 @@ func (p *parser) calcHeight(re *Regexp, force bool) int { 202 203 // push pushes the regexp re onto the parse stack and returns the regexp. 204 func (p *parser) push(re *Regexp) *Regexp { 205+ p.numRunes += len(re.Rune) 206 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] { 207 // Single rune. 208 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) { 209@@ -189,7 +322,7 @@ func (p *parser) push(re *Regexp) *Regexp { 210 } 211 212 p.stack = append(p.stack, re) 213- p.checkHeight(re) 214+ p.checkLimits(re) 215 return re 216 } 217 218@@ -299,7 +432,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) ( 219 re.Sub = re.Sub0[:1] 220 re.Sub[0] = sub 221 p.stack[n-1] = re 222- p.checkHeight(re) 223+ p.checkLimits(re) 224 225 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) { 226 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} 227@@ -503,6 +636,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp { 228 229 for j := start; j < i; j++ { 230 sub[j] = p.removeLeadingString(sub[j], len(str)) 231+ p.checkLimits(sub[j]) 232 } 233 suffix := p.collapse(sub[start:i], OpAlternate) // recurse 234 235@@ -560,6 +694,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp { 236 for j := start; j < i; j++ { 237 reuse := j != start // prefix came from sub[start] 238 sub[j] = p.removeLeadingRegexp(sub[j], reuse) 239+ p.checkLimits(sub[j]) 240 } 241 suffix := p.collapse(sub[start:i], OpAlternate) // recurse 242 243diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go 244index 1ef6d8a..67e3c56 100644 245--- a/src/regexp/syntax/parse_test.go 246+++ b/src/regexp/syntax/parse_test.go 247@@ -484,12 +484,15 @@ var invalidRegexps = []string{ 248 `(?P<>a)`, 249 `[a-Z]`, 250 `(?i)[a-Z]`, 251- `a{100000}`, 252- `a{100000,}`, 253- "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", 254- strings.Repeat("(", 1000) + strings.Repeat(")", 1000), 255- strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), 256 `\Q\E*`, 257+ `a{100000}`, // too much repetition 258+ `a{100000,}`, // too much repetition 259+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition 260+ strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep 261+ strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep 262+ "(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long 263+ strings.Repeat("(xx?){1000}", 1000), // too long 264+ strings.Repeat(`\pL`, 27000), // too many runes 265 } 266 267 var onlyPerl = []string{ 268-- 2692.25.1 270 271