1*4882a593SmuzhiyunFrom e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997 Mon Sep 17 00:00:00 2001 2*4882a593SmuzhiyunFrom: Russ Cox <rsc@golang.org> 3*4882a593SmuzhiyunDate: Wed, 28 Sep 2022 11:18:51 -0400 4*4882a593SmuzhiyunSubject: [PATCH] [release-branch.go1.18] regexp: limit size of parsed regexps 5*4882a593Smuzhiyun 6*4882a593SmuzhiyunSet a 128 MB limit on the amount of space used by []syntax.Inst 7*4882a593Smuzhiyunin the compiled form corresponding to a given regexp. 8*4882a593Smuzhiyun 9*4882a593SmuzhiyunAlso set a 128 MB limit on the rune storage in the *syntax.Regexp 10*4882a593Smuzhiyuntree itself. 11*4882a593Smuzhiyun 12*4882a593SmuzhiyunThanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue. 13*4882a593Smuzhiyun 14*4882a593SmuzhiyunFixes CVE-2022-41715. 15*4882a593SmuzhiyunUpdates #55949. 16*4882a593SmuzhiyunFixes #55950. 17*4882a593Smuzhiyun 18*4882a593SmuzhiyunChange-Id: Ia656baed81564436368cf950e1c5409752f28e1b 19*4882a593SmuzhiyunReviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136 20*4882a593SmuzhiyunTryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com> 21*4882a593SmuzhiyunReviewed-by: Damien Neil <dneil@google.com> 22*4882a593SmuzhiyunRun-TryBot: Roland Shoemaker <bracewell@google.com> 23*4882a593SmuzhiyunReviewed-by: Julie Qiu <julieqiu@google.com> 24*4882a593SmuzhiyunReviewed-on: https://go-review.googlesource.com/c/go/+/438501 25*4882a593SmuzhiyunRun-TryBot: Carlos Amedee <carlos@golang.org> 26*4882a593SmuzhiyunReviewed-by: Carlos Amedee <carlos@golang.org> 27*4882a593SmuzhiyunReviewed-by: Dmitri Shuralyov <dmitshur@google.com> 28*4882a593SmuzhiyunTryBot-Result: Gopher Robot <gobot@golang.org> 29*4882a593SmuzhiyunReviewed-by: Dmitri Shuralyov <dmitshur@golang.org> 30*4882a593Smuzhiyun 31*4882a593SmuzhiyunUpstream-Status: Backport [https://github.com/golang/go/commit/e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997] 32*4882a593SmuzhiyunCVE: CVE-2022-41715 33*4882a593SmuzhiyunSigned-off-by: Hitendra Prajapati <hprajapati@mvista.com> 34*4882a593Smuzhiyun--- 35*4882a593Smuzhiyun src/regexp/syntax/parse.go | 145 ++++++++++++++++++++++++++++++-- 36*4882a593Smuzhiyun src/regexp/syntax/parse_test.go | 13 +-- 37*4882a593Smuzhiyun 2 files changed, 148 insertions(+), 10 deletions(-) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyundiff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go 40*4882a593Smuzhiyunindex d7cf2af..3792960 100644 41*4882a593Smuzhiyun--- a/src/regexp/syntax/parse.go 42*4882a593Smuzhiyun+++ b/src/regexp/syntax/parse.go 43*4882a593Smuzhiyun@@ -90,15 +90,49 @@ const ( 44*4882a593Smuzhiyun // until we've allocated at least maxHeight Regexp structures. 45*4882a593Smuzhiyun const maxHeight = 1000 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun+// maxSize is the maximum size of a compiled regexp in Insts. 48*4882a593Smuzhiyun+// It too is somewhat arbitrarily chosen, but the idea is to be large enough 49*4882a593Smuzhiyun+// to allow significant regexps while at the same time small enough that 50*4882a593Smuzhiyun+// the compiled form will not take up too much memory. 51*4882a593Smuzhiyun+// 128 MB is enough for a 3.3 million Inst structures, which roughly 52*4882a593Smuzhiyun+// corresponds to a 3.3 MB regexp. 53*4882a593Smuzhiyun+const ( 54*4882a593Smuzhiyun+ maxSize = 128 << 20 / instSize 55*4882a593Smuzhiyun+ instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words 56*4882a593Smuzhiyun+) 57*4882a593Smuzhiyun+ 58*4882a593Smuzhiyun+// maxRunes is the maximum number of runes allowed in a regexp tree 59*4882a593Smuzhiyun+// counting the runes in all the nodes. 60*4882a593Smuzhiyun+// Ignoring character classes p.numRunes is always less than the length of the regexp. 61*4882a593Smuzhiyun+// Character classes can make it much larger: each \pL adds 1292 runes. 62*4882a593Smuzhiyun+// 128 MB is enough for 32M runes, which is over 26k \pL instances. 63*4882a593Smuzhiyun+// Note that repetitions do not make copies of the rune slices, 64*4882a593Smuzhiyun+// so \pL{1000} is only one rune slice, not 1000. 65*4882a593Smuzhiyun+// We could keep a cache of character classes we've seen, 66*4882a593Smuzhiyun+// so that all the \pL we see use the same rune list, 67*4882a593Smuzhiyun+// but that doesn't remove the problem entirely: 68*4882a593Smuzhiyun+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()]. 69*4882a593Smuzhiyun+// And because the Rune slice is exposed directly in the Regexp, 70*4882a593Smuzhiyun+// there is not an opportunity to change the representation to allow 71*4882a593Smuzhiyun+// partial sharing between different character classes. 72*4882a593Smuzhiyun+// So the limit is the best we can do. 73*4882a593Smuzhiyun+const ( 74*4882a593Smuzhiyun+ maxRunes = 128 << 20 / runeSize 75*4882a593Smuzhiyun+ runeSize = 4 // rune is int32 76*4882a593Smuzhiyun+) 77*4882a593Smuzhiyun+ 78*4882a593Smuzhiyun type parser struct { 79*4882a593Smuzhiyun flags Flags // parse mode flags 80*4882a593Smuzhiyun stack []*Regexp // stack of parsed expressions 81*4882a593Smuzhiyun free *Regexp 82*4882a593Smuzhiyun numCap int // number of capturing groups seen 83*4882a593Smuzhiyun wholeRegexp string 84*4882a593Smuzhiyun- tmpClass []rune // temporary char class work space 85*4882a593Smuzhiyun- numRegexp int // number of regexps allocated 86*4882a593Smuzhiyun- height map[*Regexp]int // regexp height for height limit check 87*4882a593Smuzhiyun+ tmpClass []rune // temporary char class work space 88*4882a593Smuzhiyun+ numRegexp int // number of regexps allocated 89*4882a593Smuzhiyun+ numRunes int // number of runes in char classes 90*4882a593Smuzhiyun+ repeats int64 // product of all repetitions seen 91*4882a593Smuzhiyun+ height map[*Regexp]int // regexp height, for height limit check 92*4882a593Smuzhiyun+ size map[*Regexp]int64 // regexp compiled size, for size limit check 93*4882a593Smuzhiyun } 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun func (p *parser) newRegexp(op Op) *Regexp { 96*4882a593Smuzhiyun@@ -122,6 +156,104 @@ func (p *parser) reuse(re *Regexp) { 97*4882a593Smuzhiyun p.free = re 98*4882a593Smuzhiyun } 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun+func (p *parser) checkLimits(re *Regexp) { 101*4882a593Smuzhiyun+ if p.numRunes > maxRunes { 102*4882a593Smuzhiyun+ panic(ErrInternalError) 103*4882a593Smuzhiyun+ } 104*4882a593Smuzhiyun+ p.checkSize(re) 105*4882a593Smuzhiyun+ p.checkHeight(re) 106*4882a593Smuzhiyun+} 107*4882a593Smuzhiyun+ 108*4882a593Smuzhiyun+func (p *parser) checkSize(re *Regexp) { 109*4882a593Smuzhiyun+ if p.size == nil { 110*4882a593Smuzhiyun+ // We haven't started tracking size yet. 111*4882a593Smuzhiyun+ // Do a relatively cheap check to see if we need to start. 112*4882a593Smuzhiyun+ // Maintain the product of all the repeats we've seen 113*4882a593Smuzhiyun+ // and don't track if the total number of regexp nodes 114*4882a593Smuzhiyun+ // we've seen times the repeat product is in budget. 115*4882a593Smuzhiyun+ if p.repeats == 0 { 116*4882a593Smuzhiyun+ p.repeats = 1 117*4882a593Smuzhiyun+ } 118*4882a593Smuzhiyun+ if re.Op == OpRepeat { 119*4882a593Smuzhiyun+ n := re.Max 120*4882a593Smuzhiyun+ if n == -1 { 121*4882a593Smuzhiyun+ n = re.Min 122*4882a593Smuzhiyun+ } 123*4882a593Smuzhiyun+ if n <= 0 { 124*4882a593Smuzhiyun+ n = 1 125*4882a593Smuzhiyun+ } 126*4882a593Smuzhiyun+ if int64(n) > maxSize/p.repeats { 127*4882a593Smuzhiyun+ p.repeats = maxSize 128*4882a593Smuzhiyun+ } else { 129*4882a593Smuzhiyun+ p.repeats *= int64(n) 130*4882a593Smuzhiyun+ } 131*4882a593Smuzhiyun+ } 132*4882a593Smuzhiyun+ if int64(p.numRegexp) < maxSize/p.repeats { 133*4882a593Smuzhiyun+ return 134*4882a593Smuzhiyun+ } 135*4882a593Smuzhiyun+ 136*4882a593Smuzhiyun+ // We need to start tracking size. 137*4882a593Smuzhiyun+ // Make the map and belatedly populate it 138*4882a593Smuzhiyun+ // with info about everything we've constructed so far. 139*4882a593Smuzhiyun+ p.size = make(map[*Regexp]int64) 140*4882a593Smuzhiyun+ for _, re := range p.stack { 141*4882a593Smuzhiyun+ p.checkSize(re) 142*4882a593Smuzhiyun+ } 143*4882a593Smuzhiyun+ } 144*4882a593Smuzhiyun+ 145*4882a593Smuzhiyun+ if p.calcSize(re, true) > maxSize { 146*4882a593Smuzhiyun+ panic(ErrInternalError) 147*4882a593Smuzhiyun+ } 148*4882a593Smuzhiyun+} 149*4882a593Smuzhiyun+ 150*4882a593Smuzhiyun+func (p *parser) calcSize(re *Regexp, force bool) int64 { 151*4882a593Smuzhiyun+ if !force { 152*4882a593Smuzhiyun+ if size, ok := p.size[re]; ok { 153*4882a593Smuzhiyun+ return size 154*4882a593Smuzhiyun+ } 155*4882a593Smuzhiyun+ } 156*4882a593Smuzhiyun+ 157*4882a593Smuzhiyun+ var size int64 158*4882a593Smuzhiyun+ switch re.Op { 159*4882a593Smuzhiyun+ case OpLiteral: 160*4882a593Smuzhiyun+ size = int64(len(re.Rune)) 161*4882a593Smuzhiyun+ case OpCapture, OpStar: 162*4882a593Smuzhiyun+ // star can be 1+ or 2+; assume 2 pessimistically 163*4882a593Smuzhiyun+ size = 2 + p.calcSize(re.Sub[0], false) 164*4882a593Smuzhiyun+ case OpPlus, OpQuest: 165*4882a593Smuzhiyun+ size = 1 + p.calcSize(re.Sub[0], false) 166*4882a593Smuzhiyun+ case OpConcat: 167*4882a593Smuzhiyun+ for _, sub := range re.Sub { 168*4882a593Smuzhiyun+ size += p.calcSize(sub, false) 169*4882a593Smuzhiyun+ } 170*4882a593Smuzhiyun+ case OpAlternate: 171*4882a593Smuzhiyun+ for _, sub := range re.Sub { 172*4882a593Smuzhiyun+ size += p.calcSize(sub, false) 173*4882a593Smuzhiyun+ } 174*4882a593Smuzhiyun+ if len(re.Sub) > 1 { 175*4882a593Smuzhiyun+ size += int64(len(re.Sub)) - 1 176*4882a593Smuzhiyun+ } 177*4882a593Smuzhiyun+ case OpRepeat: 178*4882a593Smuzhiyun+ sub := p.calcSize(re.Sub[0], false) 179*4882a593Smuzhiyun+ if re.Max == -1 { 180*4882a593Smuzhiyun+ if re.Min == 0 { 181*4882a593Smuzhiyun+ size = 2 + sub // x* 182*4882a593Smuzhiyun+ } else { 183*4882a593Smuzhiyun+ size = 1 + int64(re.Min)*sub // xxx+ 184*4882a593Smuzhiyun+ } 185*4882a593Smuzhiyun+ break 186*4882a593Smuzhiyun+ } 187*4882a593Smuzhiyun+ // x{2,5} = xx(x(x(x)?)?)? 188*4882a593Smuzhiyun+ size = int64(re.Max)*sub + int64(re.Max-re.Min) 189*4882a593Smuzhiyun+ } 190*4882a593Smuzhiyun+ 191*4882a593Smuzhiyun+ if size < 1 { 192*4882a593Smuzhiyun+ size = 1 193*4882a593Smuzhiyun+ } 194*4882a593Smuzhiyun+ p.size[re] = size 195*4882a593Smuzhiyun+ return size 196*4882a593Smuzhiyun+} 197*4882a593Smuzhiyun+ 198*4882a593Smuzhiyun func (p *parser) checkHeight(re *Regexp) { 199*4882a593Smuzhiyun if p.numRegexp < maxHeight { 200*4882a593Smuzhiyun return 201*4882a593Smuzhiyun@@ -158,6 +290,7 @@ func (p *parser) calcHeight(re *Regexp, force bool) int { 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun // push pushes the regexp re onto the parse stack and returns the regexp. 204*4882a593Smuzhiyun func (p *parser) push(re *Regexp) *Regexp { 205*4882a593Smuzhiyun+ p.numRunes += len(re.Rune) 206*4882a593Smuzhiyun if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] { 207*4882a593Smuzhiyun // Single rune. 208*4882a593Smuzhiyun if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) { 209*4882a593Smuzhiyun@@ -189,7 +322,7 @@ func (p *parser) push(re *Regexp) *Regexp { 210*4882a593Smuzhiyun } 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun p.stack = append(p.stack, re) 213*4882a593Smuzhiyun- p.checkHeight(re) 214*4882a593Smuzhiyun+ p.checkLimits(re) 215*4882a593Smuzhiyun return re 216*4882a593Smuzhiyun } 217*4882a593Smuzhiyun 218*4882a593Smuzhiyun@@ -299,7 +432,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) ( 219*4882a593Smuzhiyun re.Sub = re.Sub0[:1] 220*4882a593Smuzhiyun re.Sub[0] = sub 221*4882a593Smuzhiyun p.stack[n-1] = re 222*4882a593Smuzhiyun- p.checkHeight(re) 223*4882a593Smuzhiyun+ p.checkLimits(re) 224*4882a593Smuzhiyun 225*4882a593Smuzhiyun if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) { 226*4882a593Smuzhiyun return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} 227*4882a593Smuzhiyun@@ -503,6 +636,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp { 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun for j := start; j < i; j++ { 230*4882a593Smuzhiyun sub[j] = p.removeLeadingString(sub[j], len(str)) 231*4882a593Smuzhiyun+ p.checkLimits(sub[j]) 232*4882a593Smuzhiyun } 233*4882a593Smuzhiyun suffix := p.collapse(sub[start:i], OpAlternate) // recurse 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun@@ -560,6 +694,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp { 236*4882a593Smuzhiyun for j := start; j < i; j++ { 237*4882a593Smuzhiyun reuse := j != start // prefix came from sub[start] 238*4882a593Smuzhiyun sub[j] = p.removeLeadingRegexp(sub[j], reuse) 239*4882a593Smuzhiyun+ p.checkLimits(sub[j]) 240*4882a593Smuzhiyun } 241*4882a593Smuzhiyun suffix := p.collapse(sub[start:i], OpAlternate) // recurse 242*4882a593Smuzhiyun 243*4882a593Smuzhiyundiff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go 244*4882a593Smuzhiyunindex 1ef6d8a..67e3c56 100644 245*4882a593Smuzhiyun--- a/src/regexp/syntax/parse_test.go 246*4882a593Smuzhiyun+++ b/src/regexp/syntax/parse_test.go 247*4882a593Smuzhiyun@@ -484,12 +484,15 @@ var invalidRegexps = []string{ 248*4882a593Smuzhiyun `(?P<>a)`, 249*4882a593Smuzhiyun `[a-Z]`, 250*4882a593Smuzhiyun `(?i)[a-Z]`, 251*4882a593Smuzhiyun- `a{100000}`, 252*4882a593Smuzhiyun- `a{100000,}`, 253*4882a593Smuzhiyun- "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", 254*4882a593Smuzhiyun- strings.Repeat("(", 1000) + strings.Repeat(")", 1000), 255*4882a593Smuzhiyun- strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), 256*4882a593Smuzhiyun `\Q\E*`, 257*4882a593Smuzhiyun+ `a{100000}`, // too much repetition 258*4882a593Smuzhiyun+ `a{100000,}`, // too much repetition 259*4882a593Smuzhiyun+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition 260*4882a593Smuzhiyun+ strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep 261*4882a593Smuzhiyun+ strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep 262*4882a593Smuzhiyun+ "(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long 263*4882a593Smuzhiyun+ strings.Repeat("(xx?){1000}", 1000), // too long 264*4882a593Smuzhiyun+ strings.Repeat(`\pL`, 27000), // too many runes 265*4882a593Smuzhiyun } 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun var onlyPerl = []string{ 268*4882a593Smuzhiyun-- 269*4882a593Smuzhiyun2.25.1 270*4882a593Smuzhiyun 271