xref: /OK3568_Linux_fs/yocto/poky/meta/recipes-devtools/go/go-1.18/CVE-2022-41715.patch (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593SmuzhiyunFrom e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997 Mon Sep 17 00:00:00 2001
2*4882a593SmuzhiyunFrom: Russ Cox <rsc@golang.org>
3*4882a593SmuzhiyunDate: Wed, 28 Sep 2022 11:18:51 -0400
4*4882a593SmuzhiyunSubject: [PATCH] [release-branch.go1.18] regexp: limit size of parsed regexps
5*4882a593Smuzhiyun
6*4882a593SmuzhiyunSet a 128 MB limit on the amount of space used by []syntax.Inst
7*4882a593Smuzhiyunin the compiled form corresponding to a given regexp.
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunAlso set a 128 MB limit on the rune storage in the *syntax.Regexp
10*4882a593Smuzhiyuntree itself.
11*4882a593Smuzhiyun
12*4882a593SmuzhiyunThanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue.
13*4882a593Smuzhiyun
14*4882a593SmuzhiyunFixes CVE-2022-41715.
15*4882a593SmuzhiyunUpdates #55949.
16*4882a593SmuzhiyunFixes #55950.
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunChange-Id: Ia656baed81564436368cf950e1c5409752f28e1b
19*4882a593SmuzhiyunReviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136
20*4882a593SmuzhiyunTryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
21*4882a593SmuzhiyunReviewed-by: Damien Neil <dneil@google.com>
22*4882a593SmuzhiyunRun-TryBot: Roland Shoemaker <bracewell@google.com>
23*4882a593SmuzhiyunReviewed-by: Julie Qiu <julieqiu@google.com>
24*4882a593SmuzhiyunReviewed-on: https://go-review.googlesource.com/c/go/+/438501
25*4882a593SmuzhiyunRun-TryBot: Carlos Amedee <carlos@golang.org>
26*4882a593SmuzhiyunReviewed-by: Carlos Amedee <carlos@golang.org>
27*4882a593SmuzhiyunReviewed-by: Dmitri Shuralyov <dmitshur@google.com>
28*4882a593SmuzhiyunTryBot-Result: Gopher Robot <gobot@golang.org>
29*4882a593SmuzhiyunReviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
30*4882a593Smuzhiyun
31*4882a593SmuzhiyunUpstream-Status: Backport [https://github.com/golang/go/commit/e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997]
32*4882a593SmuzhiyunCVE: CVE-2022-41715
33*4882a593SmuzhiyunSigned-off-by: Hitendra Prajapati <hprajapati@mvista.com>
34*4882a593Smuzhiyun---
35*4882a593Smuzhiyun src/regexp/syntax/parse.go      | 145 ++++++++++++++++++++++++++++++--
36*4882a593Smuzhiyun src/regexp/syntax/parse_test.go |  13 +--
37*4882a593Smuzhiyun 2 files changed, 148 insertions(+), 10 deletions(-)
38*4882a593Smuzhiyun
39*4882a593Smuzhiyundiff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
40*4882a593Smuzhiyunindex d7cf2af..3792960 100644
41*4882a593Smuzhiyun--- a/src/regexp/syntax/parse.go
42*4882a593Smuzhiyun+++ b/src/regexp/syntax/parse.go
43*4882a593Smuzhiyun@@ -90,15 +90,49 @@ const (
44*4882a593Smuzhiyun // until we've allocated at least maxHeight Regexp structures.
45*4882a593Smuzhiyun const maxHeight = 1000
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun+// maxSize is the maximum size of a compiled regexp in Insts.
48*4882a593Smuzhiyun+// It too is somewhat arbitrarily chosen, but the idea is to be large enough
49*4882a593Smuzhiyun+// to allow significant regexps while at the same time small enough that
50*4882a593Smuzhiyun+// the compiled form will not take up too much memory.
51*4882a593Smuzhiyun+// 128 MB is enough for a 3.3 million Inst structures, which roughly
52*4882a593Smuzhiyun+// corresponds to a 3.3 MB regexp.
53*4882a593Smuzhiyun+const (
54*4882a593Smuzhiyun+	maxSize  = 128 << 20 / instSize
55*4882a593Smuzhiyun+	instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
56*4882a593Smuzhiyun+)
57*4882a593Smuzhiyun+
58*4882a593Smuzhiyun+// maxRunes is the maximum number of runes allowed in a regexp tree
59*4882a593Smuzhiyun+// counting the runes in all the nodes.
60*4882a593Smuzhiyun+// Ignoring character classes p.numRunes is always less than the length of the regexp.
61*4882a593Smuzhiyun+// Character classes can make it much larger: each \pL adds 1292 runes.
62*4882a593Smuzhiyun+// 128 MB is enough for 32M runes, which is over 26k \pL instances.
63*4882a593Smuzhiyun+// Note that repetitions do not make copies of the rune slices,
64*4882a593Smuzhiyun+// so \pL{1000} is only one rune slice, not 1000.
65*4882a593Smuzhiyun+// We could keep a cache of character classes we've seen,
66*4882a593Smuzhiyun+// so that all the \pL we see use the same rune list,
67*4882a593Smuzhiyun+// but that doesn't remove the problem entirely:
68*4882a593Smuzhiyun+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
69*4882a593Smuzhiyun+// And because the Rune slice is exposed directly in the Regexp,
70*4882a593Smuzhiyun+// there is not an opportunity to change the representation to allow
71*4882a593Smuzhiyun+// partial sharing between different character classes.
72*4882a593Smuzhiyun+// So the limit is the best we can do.
73*4882a593Smuzhiyun+const (
74*4882a593Smuzhiyun+	maxRunes = 128 << 20 / runeSize
75*4882a593Smuzhiyun+	runeSize = 4 // rune is int32
76*4882a593Smuzhiyun+)
77*4882a593Smuzhiyun+
78*4882a593Smuzhiyun type parser struct {
79*4882a593Smuzhiyun 	flags       Flags     // parse mode flags
80*4882a593Smuzhiyun 	stack       []*Regexp // stack of parsed expressions
81*4882a593Smuzhiyun 	free        *Regexp
82*4882a593Smuzhiyun 	numCap      int // number of capturing groups seen
83*4882a593Smuzhiyun 	wholeRegexp string
84*4882a593Smuzhiyun-	tmpClass    []rune          // temporary char class work space
85*4882a593Smuzhiyun-	numRegexp   int             // number of regexps allocated
86*4882a593Smuzhiyun-	height      map[*Regexp]int // regexp height for height limit check
87*4882a593Smuzhiyun+	tmpClass    []rune            // temporary char class work space
88*4882a593Smuzhiyun+	numRegexp   int               // number of regexps allocated
89*4882a593Smuzhiyun+	numRunes    int               // number of runes in char classes
90*4882a593Smuzhiyun+	repeats     int64             // product of all repetitions seen
91*4882a593Smuzhiyun+	height      map[*Regexp]int   // regexp height, for height limit check
92*4882a593Smuzhiyun+	size        map[*Regexp]int64 // regexp compiled size, for size limit check
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun func (p *parser) newRegexp(op Op) *Regexp {
96*4882a593Smuzhiyun@@ -122,6 +156,104 @@ func (p *parser) reuse(re *Regexp) {
97*4882a593Smuzhiyun 	p.free = re
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun+func (p *parser) checkLimits(re *Regexp) {
101*4882a593Smuzhiyun+	if p.numRunes > maxRunes {
102*4882a593Smuzhiyun+		panic(ErrInternalError)
103*4882a593Smuzhiyun+	}
104*4882a593Smuzhiyun+	p.checkSize(re)
105*4882a593Smuzhiyun+	p.checkHeight(re)
106*4882a593Smuzhiyun+}
107*4882a593Smuzhiyun+
108*4882a593Smuzhiyun+func (p *parser) checkSize(re *Regexp) {
109*4882a593Smuzhiyun+	if p.size == nil {
110*4882a593Smuzhiyun+		// We haven't started tracking size yet.
111*4882a593Smuzhiyun+		// Do a relatively cheap check to see if we need to start.
112*4882a593Smuzhiyun+		// Maintain the product of all the repeats we've seen
113*4882a593Smuzhiyun+		// and don't track if the total number of regexp nodes
114*4882a593Smuzhiyun+		// we've seen times the repeat product is in budget.
115*4882a593Smuzhiyun+		if p.repeats == 0 {
116*4882a593Smuzhiyun+			p.repeats = 1
117*4882a593Smuzhiyun+		}
118*4882a593Smuzhiyun+		if re.Op == OpRepeat {
119*4882a593Smuzhiyun+			n := re.Max
120*4882a593Smuzhiyun+			if n == -1 {
121*4882a593Smuzhiyun+				n = re.Min
122*4882a593Smuzhiyun+			}
123*4882a593Smuzhiyun+			if n <= 0 {
124*4882a593Smuzhiyun+				n = 1
125*4882a593Smuzhiyun+			}
126*4882a593Smuzhiyun+			if int64(n) > maxSize/p.repeats {
127*4882a593Smuzhiyun+				p.repeats = maxSize
128*4882a593Smuzhiyun+			} else {
129*4882a593Smuzhiyun+				p.repeats *= int64(n)
130*4882a593Smuzhiyun+			}
131*4882a593Smuzhiyun+		}
132*4882a593Smuzhiyun+		if int64(p.numRegexp) < maxSize/p.repeats {
133*4882a593Smuzhiyun+			return
134*4882a593Smuzhiyun+		}
135*4882a593Smuzhiyun+
136*4882a593Smuzhiyun+		// We need to start tracking size.
137*4882a593Smuzhiyun+		// Make the map and belatedly populate it
138*4882a593Smuzhiyun+		// with info about everything we've constructed so far.
139*4882a593Smuzhiyun+		p.size = make(map[*Regexp]int64)
140*4882a593Smuzhiyun+		for _, re := range p.stack {
141*4882a593Smuzhiyun+			p.checkSize(re)
142*4882a593Smuzhiyun+		}
143*4882a593Smuzhiyun+	}
144*4882a593Smuzhiyun+
145*4882a593Smuzhiyun+	if p.calcSize(re, true) > maxSize {
146*4882a593Smuzhiyun+		panic(ErrInternalError)
147*4882a593Smuzhiyun+	}
148*4882a593Smuzhiyun+}
149*4882a593Smuzhiyun+
150*4882a593Smuzhiyun+func (p *parser) calcSize(re *Regexp, force bool) int64 {
151*4882a593Smuzhiyun+	if !force {
152*4882a593Smuzhiyun+		if size, ok := p.size[re]; ok {
153*4882a593Smuzhiyun+			return size
154*4882a593Smuzhiyun+		}
155*4882a593Smuzhiyun+	}
156*4882a593Smuzhiyun+
157*4882a593Smuzhiyun+	var size int64
158*4882a593Smuzhiyun+	switch re.Op {
159*4882a593Smuzhiyun+	case OpLiteral:
160*4882a593Smuzhiyun+		size = int64(len(re.Rune))
161*4882a593Smuzhiyun+	case OpCapture, OpStar:
162*4882a593Smuzhiyun+		// star can be 1+ or 2+; assume 2 pessimistically
163*4882a593Smuzhiyun+		size = 2 + p.calcSize(re.Sub[0], false)
164*4882a593Smuzhiyun+	case OpPlus, OpQuest:
165*4882a593Smuzhiyun+		size = 1 + p.calcSize(re.Sub[0], false)
166*4882a593Smuzhiyun+	case OpConcat:
167*4882a593Smuzhiyun+		for _, sub := range re.Sub {
168*4882a593Smuzhiyun+			size += p.calcSize(sub, false)
169*4882a593Smuzhiyun+		}
170*4882a593Smuzhiyun+	case OpAlternate:
171*4882a593Smuzhiyun+		for _, sub := range re.Sub {
172*4882a593Smuzhiyun+			size += p.calcSize(sub, false)
173*4882a593Smuzhiyun+		}
174*4882a593Smuzhiyun+		if len(re.Sub) > 1 {
175*4882a593Smuzhiyun+			size += int64(len(re.Sub)) - 1
176*4882a593Smuzhiyun+		}
177*4882a593Smuzhiyun+	case OpRepeat:
178*4882a593Smuzhiyun+		sub := p.calcSize(re.Sub[0], false)
179*4882a593Smuzhiyun+		if re.Max == -1 {
180*4882a593Smuzhiyun+			if re.Min == 0 {
181*4882a593Smuzhiyun+				size = 2 + sub // x*
182*4882a593Smuzhiyun+			} else {
183*4882a593Smuzhiyun+				size = 1 + int64(re.Min)*sub // xxx+
184*4882a593Smuzhiyun+			}
185*4882a593Smuzhiyun+			break
186*4882a593Smuzhiyun+		}
187*4882a593Smuzhiyun+		// x{2,5} = xx(x(x(x)?)?)?
188*4882a593Smuzhiyun+		size = int64(re.Max)*sub + int64(re.Max-re.Min)
189*4882a593Smuzhiyun+	}
190*4882a593Smuzhiyun+
191*4882a593Smuzhiyun+	if size < 1 {
192*4882a593Smuzhiyun+		size = 1
193*4882a593Smuzhiyun+	}
194*4882a593Smuzhiyun+	p.size[re] = size
195*4882a593Smuzhiyun+	return size
196*4882a593Smuzhiyun+}
197*4882a593Smuzhiyun+
198*4882a593Smuzhiyun func (p *parser) checkHeight(re *Regexp) {
199*4882a593Smuzhiyun 	if p.numRegexp < maxHeight {
200*4882a593Smuzhiyun 		return
201*4882a593Smuzhiyun@@ -158,6 +290,7 @@ func (p *parser) calcHeight(re *Regexp, force bool) int {
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun // push pushes the regexp re onto the parse stack and returns the regexp.
204*4882a593Smuzhiyun func (p *parser) push(re *Regexp) *Regexp {
205*4882a593Smuzhiyun+	p.numRunes += len(re.Rune)
206*4882a593Smuzhiyun 	if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
207*4882a593Smuzhiyun 		// Single rune.
208*4882a593Smuzhiyun 		if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
209*4882a593Smuzhiyun@@ -189,7 +322,7 @@ func (p *parser) push(re *Regexp) *Regexp {
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun 	p.stack = append(p.stack, re)
213*4882a593Smuzhiyun-	p.checkHeight(re)
214*4882a593Smuzhiyun+	p.checkLimits(re)
215*4882a593Smuzhiyun 	return re
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun@@ -299,7 +432,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
219*4882a593Smuzhiyun 	re.Sub = re.Sub0[:1]
220*4882a593Smuzhiyun 	re.Sub[0] = sub
221*4882a593Smuzhiyun 	p.stack[n-1] = re
222*4882a593Smuzhiyun-	p.checkHeight(re)
223*4882a593Smuzhiyun+	p.checkLimits(re)
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun 	if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
226*4882a593Smuzhiyun 		return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
227*4882a593Smuzhiyun@@ -503,6 +636,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun 			for j := start; j < i; j++ {
230*4882a593Smuzhiyun 				sub[j] = p.removeLeadingString(sub[j], len(str))
231*4882a593Smuzhiyun+				p.checkLimits(sub[j])
232*4882a593Smuzhiyun 			}
233*4882a593Smuzhiyun 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun@@ -560,6 +694,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
236*4882a593Smuzhiyun 			for j := start; j < i; j++ {
237*4882a593Smuzhiyun 				reuse := j != start // prefix came from sub[start]
238*4882a593Smuzhiyun 				sub[j] = p.removeLeadingRegexp(sub[j], reuse)
239*4882a593Smuzhiyun+				p.checkLimits(sub[j])
240*4882a593Smuzhiyun 			}
241*4882a593Smuzhiyun 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
242*4882a593Smuzhiyun
243*4882a593Smuzhiyundiff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
244*4882a593Smuzhiyunindex 1ef6d8a..67e3c56 100644
245*4882a593Smuzhiyun--- a/src/regexp/syntax/parse_test.go
246*4882a593Smuzhiyun+++ b/src/regexp/syntax/parse_test.go
247*4882a593Smuzhiyun@@ -484,12 +484,15 @@ var invalidRegexps = []string{
248*4882a593Smuzhiyun 	`(?P<>a)`,
249*4882a593Smuzhiyun 	`[a-Z]`,
250*4882a593Smuzhiyun 	`(?i)[a-Z]`,
251*4882a593Smuzhiyun-	`a{100000}`,
252*4882a593Smuzhiyun-	`a{100000,}`,
253*4882a593Smuzhiyun-	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
254*4882a593Smuzhiyun-	strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
255*4882a593Smuzhiyun-	strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
256*4882a593Smuzhiyun 	`\Q\E*`,
257*4882a593Smuzhiyun+	`a{100000}`,  // too much repetition
258*4882a593Smuzhiyun+	`a{100000,}`, // too much repetition
259*4882a593Smuzhiyun+	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",    // too much repetition
260*4882a593Smuzhiyun+	strings.Repeat("(", 1000) + strings.Repeat(")", 1000),    // too deep
261*4882a593Smuzhiyun+	strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
262*4882a593Smuzhiyun+	"(" + strings.Repeat("(xx?)", 1000) + "){1000}",          // too long
263*4882a593Smuzhiyun+	strings.Repeat("(xx?){1000}", 1000),                      // too long
264*4882a593Smuzhiyun+	strings.Repeat(`\pL`, 27000),                             // too many runes
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun var onlyPerl = []string{
268*4882a593Smuzhiyun--
269*4882a593Smuzhiyun2.25.1
270*4882a593Smuzhiyun
271