xref: /OK3568_Linux_fs/yocto/poky/meta/recipes-devtools/binutils/binutils/0020-CVE-2023-22608-1.patch (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1From 116aac1447ee92df25599859293752648e3c6ea0 Mon Sep 17 00:00:00 2001
2From: "Steinar H. Gunderson" <sesse@google.com>
3Date: Fri, 20 May 2022 16:10:34 +0200
4Subject: [PATCH] add a trie to map quickly from address range to compilation
5MIME-Version: 1.0
6Content-Type: text/plain; charset=UTF-8
7Content-Transfer-Encoding: 8bit
8
9 unit
10MIME-Version: 1.0
11Content-Type: text/plain; charset=UTF-8
12Content-Transfer-Encoding: 8bit
13
14When using perf to profile large binaries, _bfd_dwarf2_find_nearest_line()
15becomes a hotspot, as perf wants to get line number information
16(for inline-detection purposes) for each and every sample. In Chromium
17in particular (the content_shell binary), this entails going through
18475k address ranges, which takes a long time when done repeatedly.
19
20Add a radix-256 trie over the address space to quickly map address to
21compilation unit spaces; for content_shell, which is 1.6 GB when some
22(but not full) debug information turned is on, we go from 6 ms to
230.006 ms (6 µs) for each lookup from address to compilation unit, a 1000x
24speedup.
25
26There is a modest RAM increase of 180 MB in this binary (the existing
27linked list over ranges uses about 10 MB, and the entire perf job uses
28between 2-3 GB for a medium-size profile); for smaller binaries with few
29ranges, there should be hardly any extra RAM usage at all.
30
31Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=b43771b045fb5616da3964f2994eefbe8ae70d32]
32
33CVE: CVE-2023-22608
34
35Signed-off-by: Yash Shinde <Yash.Shinde@windriver.com>
36
37---
38 bfd/dwarf2.c | 326 ++++++++++++++++++++++++++++++++++++++++++++++++---
39 1 file changed, 312 insertions(+), 14 deletions(-)
40
41diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
42index fdf071c3..0ae50a37 100644
43--- a/bfd/dwarf2.c
44+++ b/bfd/dwarf2.c
45@@ -82,6 +82,77 @@ struct adjusted_section
46   bfd_vma adj_vma;
47 };
48
49+/* A trie to map quickly from address range to compilation unit.
50+
51+   This is a fairly standard radix-256 trie, used to quickly locate which
52+   compilation unit any given address belongs to.  Given that each compilation
53+   unit may register hundreds of very small and unaligned ranges (which may
54+   potentially overlap, due to inlining and other concerns), and a large
55+   program may end up containing hundreds of thousands of such ranges, we cannot
56+   scan through them linearly without undue slowdown.
57+
58+   We use a hybrid trie to avoid memory explosion: There are two types of trie
59+   nodes, leaves and interior nodes.  (Almost all nodes are leaves, so they
60+   take up the bulk of the memory usage.) Leaves contain a simple array of
61+   ranges (high/low address) and which compilation unit contains those ranges,
62+   and when we get to a leaf, we scan through it linearly.  Interior nodes
63+   contain pointers to 256 other nodes, keyed by the next byte of the address.
64+   So for a 64-bit address like 0x1234567abcd, we would start at the root and go
65+   down child[0x00]->child[0x00]->child[0x01]->child[0x23]->child[0x45] etc.,
66+   until we hit a leaf.  (Nodes are, in general, leaves until they exceed the
67+   default allocation of 16 elements, at which point they are converted to
68+   interior node if possible.) This gives us near-constant lookup times;
69+   the only thing that can be costly is if there are lots of overlapping ranges
70+   within a single 256-byte segment of the binary, in which case we have to
71+   scan through them all to find the best match.
72+
73+   For a binary with few ranges, we will in practice only have a single leaf
74+   node at the root, containing a simple array.  Thus, the scheme is efficient
75+   for both small and large binaries.
76+ */
77+
78+/* Experiments have shown 16 to be a memory-efficient default leaf size.
79+   The only case where a leaf will hold more memory than this, is at the
80+   bottomost level (covering 256 bytes in the binary), where we'll expand
81+   the leaf to be able to hold more ranges if needed.
82+ */
83+#define TRIE_LEAF_SIZE 16
84+
85+/* All trie_node pointers will really be trie_leaf or trie_interior,
86+   but they have this common head.  */
87+struct trie_node
88+{
89+  /* If zero, we are an interior node.
90+     Otherwise, how many ranges we have room for in this leaf.  */
91+  unsigned int num_room_in_leaf;
92+};
93+
94+struct trie_leaf
95+{
96+  struct trie_node head;
97+  unsigned int num_stored_in_leaf;
98+  struct {
99+    struct comp_unit *unit;
100+    bfd_vma low_pc, high_pc;
101+  } ranges[TRIE_LEAF_SIZE];
102+};
103+
104+struct trie_interior
105+{
106+  struct trie_node head;
107+  struct trie_node *children[256];
108+};
109+
110+static struct trie_node *alloc_trie_leaf (bfd *abfd)
111+{
112+  struct trie_leaf *leaf =
113+    bfd_zalloc (abfd, sizeof (struct trie_leaf));
114+  if (leaf == NULL)
115+    return NULL;
116+  leaf->head.num_room_in_leaf = TRIE_LEAF_SIZE;
117+  return &leaf->head;
118+}
119+
120 struct dwarf2_debug_file
121 {
122   /* The actual bfd from which debug info was loaded.  Might be
123@@ -139,6 +210,9 @@ struct dwarf2_debug_file
124   /* A list of all previously read comp_units.  */
125   struct comp_unit *all_comp_units;
126
127+  /* A list of all previously read comp_units with no ranges (yet).  */
128+  struct comp_unit *all_comp_units_without_ranges;
129+
130   /* Last comp unit in list above.  */
131   struct comp_unit *last_comp_unit;
132
133@@ -147,6 +221,9 @@ struct dwarf2_debug_file
134
135   /* Hash table to map offsets to decoded abbrevs.  */
136   htab_t abbrev_offsets;
137+
138+  /* Root of a trie to map addresses to compilation units.  */
139+  struct trie_node *trie_root;
140 };
141
142 struct dwarf2_debug
143@@ -220,6 +297,11 @@ struct comp_unit
144   /* Chain the previously read compilation units.  */
145   struct comp_unit *next_unit;
146
147+  /* Chain the previously read compilation units that have no ranges yet.
148+     We scan these separately when we have a trie over the ranges.
149+     Unused if arange.high != 0. */
150+  struct comp_unit *next_unit_without_ranges;
151+
152   /* Likewise, chain the compilation unit read after this one.
153      The comp units are stored in reversed reading order.  */
154   struct comp_unit *prev_unit;
155@@ -296,6 +378,10 @@ struct comp_unit
156
157   /* TRUE if symbols are cached in hash table for faster lookup by name.  */
158   bool cached;
159+
160+  /* Used when iterating over trie leaves to know which units we have
161+     already seen in this iteration.  */
162+  bool mark;
163 };
164
165 /* This data structure holds the information of an abbrev.  */
166@@ -1766,9 +1852,189 @@ concat_filename (struct line_info_table *table, unsigned int file)
167   return strdup (filename);
168 }
169
170+/* Number of bits in a bfd_vma.  */
171+#define VMA_BITS (8 * sizeof (bfd_vma))
172+
173+/* Check whether [low1, high1) can be combined with [low2, high2),
174+   i.e., they touch or overlap.  */
175+static bool ranges_overlap (bfd_vma low1,
176+			    bfd_vma high1,
177+			    bfd_vma low2,
178+			    bfd_vma high2)
179+{
180+  if (low1 == low2 || high1 == high2)
181+    return true;
182+
183+  /* Sort so that low1 is below low2. */
184+  if (low1 > low2)
185+    {
186+      bfd_vma tmp;
187+
188+      tmp = low1;
189+      low1 = low2;
190+      low2 = tmp;
191+
192+      tmp = high1;
193+      high1 = high2;
194+      high2 = tmp;
195+    }
196+
197+  /* We touch iff low2 == high1.
198+     We overlap iff low2 is within [low1, high1). */
199+  return (low2 <= high1);
200+}
201+
202+/* Insert an address range in the trie mapping addresses to compilation units.
203+   Will return the new trie node (usually the same as is being sent in, but
204+   in case of a leaf-to-interior conversion, or expansion of a leaf, it may be
205+   different), or NULL on failure.
206+ */
207+static struct trie_node *insert_arange_in_trie(bfd *abfd,
208+					       struct trie_node *trie,
209+					       bfd_vma trie_pc,
210+					       unsigned int trie_pc_bits,
211+					       struct comp_unit *unit,
212+					       bfd_vma low_pc,
213+					       bfd_vma high_pc)
214+{
215+  bfd_vma clamped_low_pc, clamped_high_pc;
216+  int ch, from_ch, to_ch;
217+  bool is_full_leaf = false;
218+
219+  /* See if we can extend any of the existing ranges.  This merging
220+     isn't perfect (if merging opens up the possibility of merging two existing
221+     ranges, we won't find them), but it takes the majority of the cases.  */
222+  if (trie->num_room_in_leaf > 0)
223+    {
224+      struct trie_leaf *leaf = (struct trie_leaf *) trie;
225+      unsigned int i;
226+
227+      for (i = 0; i < leaf->num_stored_in_leaf; ++i)
228+	{
229+	  if (leaf->ranges[i].unit == unit &&
230+	      ranges_overlap(low_pc, high_pc,
231+			     leaf->ranges[i].low_pc, leaf->ranges[i].high_pc))
232+	    {
233+	      if (low_pc < leaf->ranges[i].low_pc)
234+		leaf->ranges[i].low_pc = low_pc;
235+	      if (high_pc > leaf->ranges[i].high_pc)
236+		leaf->ranges[i].high_pc = high_pc;
237+	      return trie;
238+	    }
239+	}
240+
241+      is_full_leaf = leaf->num_stored_in_leaf == trie->num_room_in_leaf;
242+    }
243+
244+  /* If we're a leaf with no more room and we're _not_ at the bottom,
245+     convert to an interior node.  */
246+  if (is_full_leaf && trie_pc_bits < VMA_BITS)
247+    {
248+      const struct trie_leaf *leaf = (struct trie_leaf *) trie;
249+      unsigned int i;
250+
251+      trie = bfd_zalloc (abfd, sizeof (struct trie_interior));
252+      if (!trie)
253+	return NULL;
254+      is_full_leaf = false;
255+
256+      /* TODO: If we wanted to save a little more memory at the cost of
257+	 complexity, we could have reused the old leaf node as one of the
258+	 children of the new interior node, instead of throwing it away.  */
259+      for (i = 0; i < leaf->num_stored_in_leaf; ++i)
260+        {
261+	  if (!insert_arange_in_trie (abfd, trie, trie_pc, trie_pc_bits,
262+				      leaf->ranges[i].unit, leaf->ranges[i].low_pc,
263+				      leaf->ranges[i].high_pc))
264+	    return NULL;
265+	}
266+    }
267+
268+  /* If we're a leaf with no more room and we _are_ at the bottom,
269+     we have no choice but to just make it larger. */
270+  if (is_full_leaf)
271+    {
272+      const struct trie_leaf *leaf = (struct trie_leaf *) trie;
273+      unsigned int new_room_in_leaf = trie->num_room_in_leaf * 2;
274+      struct trie_leaf *new_leaf;
275+
276+      new_leaf = bfd_zalloc (abfd,
277+	sizeof (struct trie_leaf) +
278+	  (new_room_in_leaf - TRIE_LEAF_SIZE) * sizeof (leaf->ranges[0]));
279+      new_leaf->head.num_room_in_leaf = new_room_in_leaf;
280+      new_leaf->num_stored_in_leaf = leaf->num_stored_in_leaf;
281+
282+      memcpy (new_leaf->ranges,
283+	      leaf->ranges,
284+	      leaf->num_stored_in_leaf * sizeof (leaf->ranges[0]));
285+      trie = &new_leaf->head;
286+      is_full_leaf = false;
287+
288+      /* Now the insert below will go through.  */
289+    }
290+
291+  /* If we're a leaf (now with room), we can just insert at the end.  */
292+  if (trie->num_room_in_leaf > 0)
293+    {
294+      struct trie_leaf *leaf = (struct trie_leaf *) trie;
295+
296+      unsigned int i = leaf->num_stored_in_leaf++;
297+      leaf->ranges[i].unit = unit;
298+      leaf->ranges[i].low_pc = low_pc;
299+      leaf->ranges[i].high_pc = high_pc;
300+      return trie;
301+    }
302+
303+  /* Now we are definitely an interior node, so recurse into all
304+     the relevant buckets.  */
305+
306+  /* Clamp the range to the current trie bucket.  */
307+  clamped_low_pc = low_pc;
308+  clamped_high_pc = high_pc;
309+  if (trie_pc_bits > 0)
310+    {
311+      bfd_vma bucket_high_pc =
312+	trie_pc + ((bfd_vma)-1 >> trie_pc_bits);  /* Inclusive.  */
313+      if (clamped_low_pc < trie_pc)
314+	clamped_low_pc = trie_pc;
315+      if (clamped_high_pc > bucket_high_pc)
316+	clamped_high_pc = bucket_high_pc;
317+    }
318+
319+  /* Insert the ranges in all buckets that it spans.  */
320+  from_ch = (clamped_low_pc >> (VMA_BITS - trie_pc_bits - 8)) & 0xff;
321+  to_ch = ((clamped_high_pc - 1) >> (VMA_BITS - trie_pc_bits - 8)) & 0xff;
322+  for (ch = from_ch; ch <= to_ch; ++ch)
323+    {
324+      struct trie_interior *interior = (struct trie_interior *) trie;
325+      struct trie_node *child = interior->children[ch];
326+
327+      if (child == NULL)
328+        {
329+	  child = alloc_trie_leaf (abfd);
330+	  if (!child)
331+	    return NULL;
332+	}
333+      child = insert_arange_in_trie (abfd,
334+				     child,
335+				     trie_pc + ((bfd_vma)ch << (VMA_BITS - trie_pc_bits - 8)),
336+				     trie_pc_bits + 8,
337+				     unit,
338+				     low_pc,
339+				     high_pc);
340+      if (!child)
341+	return NULL;
342+
343+      interior->children[ch] = child;
344+    }
345+
346+    return trie;
347+}
348+
349+
350 static bool
351-arange_add (const struct comp_unit *unit, struct arange *first_arange,
352-	    bfd_vma low_pc, bfd_vma high_pc)
353+arange_add (struct comp_unit *unit, struct arange *first_arange,
354+	    struct trie_node **trie_root, bfd_vma low_pc, bfd_vma high_pc)
355 {
356   struct arange *arange;
357
358@@ -1776,6 +2042,19 @@ arange_add (const struct comp_unit *unit, struct arange *first_arange,
359   if (low_pc == high_pc)
360     return true;
361
362+  if (trie_root != NULL)
363+    {
364+      *trie_root = insert_arange_in_trie (unit->file->bfd_ptr,
365+					  *trie_root,
366+					  0,
367+					  0,
368+					  unit,
369+					  low_pc,
370+					  high_pc);
371+      if (*trie_root == NULL)
372+	return false;
373+    }
374+
375   /* If the first arange is empty, use it.  */
376   if (first_arange->high == 0)
377     {
378@@ -2410,7 +2689,8 @@ decode_line_info (struct comp_unit *unit)
379 		    low_pc = address;
380 		  if (address > high_pc)
381 		    high_pc = address;
382-		  if (!arange_add (unit, &unit->arange, low_pc, high_pc))
383+		  if (!arange_add (unit, &unit->arange, &unit->file->trie_root,
384+				   low_pc, high_pc))
385 		    goto line_fail;
386 		  break;
387 		case DW_LNE_set_address:
388@@ -3134,7 +3414,7 @@ find_abstract_instance (struct comp_unit *unit,
389
390 static bool
391 read_ranges (struct comp_unit *unit, struct arange *arange,
392-	     bfd_uint64_t offset)
393+	     struct trie_node **trie_root, bfd_uint64_t offset)
394 {
395   bfd_byte *ranges_ptr;
396   bfd_byte *ranges_end;
397@@ -3169,7 +3449,7 @@ read_ranges (struct comp_unit *unit, struct arange *arange,
398 	base_address = high_pc;
399       else
400 	{
401-	  if (!arange_add (unit, arange,
402+	  if (!arange_add (unit, arange, trie_root,
403 			   base_address + low_pc, base_address + high_pc))
404 	    return false;
405 	}
406@@ -3179,7 +3459,7 @@ read_ranges (struct comp_unit *unit, struct arange *arange,
407
408 static bool
409 read_rnglists (struct comp_unit *unit, struct arange *arange,
410-	       bfd_uint64_t offset)
411+	       struct trie_node **trie_root, bfd_uint64_t offset)
412 {
413   bfd_byte *rngs_ptr;
414   bfd_byte *rngs_end;
415@@ -3253,19 +3533,19 @@ read_rnglists (struct comp_unit *unit, struct arange *arange,
416 	  return false;
417 	}
418
419-      if (!arange_add (unit, arange, low_pc, high_pc))
420+      if (!arange_add (unit, arange, trie_root, low_pc, high_pc))
421 	return false;
422     }
423 }
424
425 static bool
426 read_rangelist (struct comp_unit *unit, struct arange *arange,
427-		bfd_uint64_t offset)
428+		struct trie_node **trie_root, bfd_uint64_t offset)
429 {
430   if (unit->version <= 4)
431-    return read_ranges (unit, arange, offset);
432+    return read_ranges (unit, arange, trie_root, offset);
433   else
434-    return read_rnglists (unit, arange, offset);
435+    return read_rnglists (unit, arange, trie_root, offset);
436 }
437
438 static struct funcinfo *
439@@ -3563,7 +3843,8 @@ scan_unit_for_symbols (struct comp_unit *unit)
440
441 		case DW_AT_ranges:
442 		  if (is_int_form (&attr)
443-		      && !read_rangelist (unit, &func->arange, attr.u.val))
444+		      && !read_rangelist (unit, &func->arange,
445+					  &unit->file->trie_root, attr.u.val))
446 		    goto fail;
447 		  break;
448
449@@ -3679,7 +3960,8 @@ scan_unit_for_symbols (struct comp_unit *unit)
450
451       if (func && high_pc != 0)
452 	{
453-	  if (!arange_add (unit, &func->arange, low_pc, high_pc))
454+	  if (!arange_add (unit, &func->arange, &unit->file->trie_root,
455+			   low_pc, high_pc))
456 	    goto fail;
457 	}
458     }
459@@ -3874,7 +4156,8 @@ parse_comp_unit (struct dwarf2_debug *stash,
460
461 	case DW_AT_ranges:
462 	  if (is_int_form (&attr)
463-	      && !read_rangelist (unit, &unit->arange, attr.u.val))
464+	      && !read_rangelist (unit, &unit->arange,
465+				  &unit->file->trie_root, attr.u.val))
466 	    return NULL;
467 	  break;
468
469@@ -3916,7 +4199,8 @@ parse_comp_unit (struct dwarf2_debug *stash,
470     high_pc += low_pc;
471   if (high_pc != 0)
472     {
473-      if (!arange_add (unit, &unit->arange, low_pc, high_pc))
474+      if (!arange_add (unit, &unit->arange, &unit->file->trie_root,
475+		       low_pc, high_pc))
476 	return NULL;
477     }
478
479@@ -4747,6 +5031,14 @@ _bfd_dwarf2_slurp_debug_info (bfd *abfd, bfd *debug_bfd,
480   if (!stash->alt.abbrev_offsets)
481     return false;
482
483+  stash->f.trie_root = alloc_trie_leaf (abfd);
484+  if (!stash->f.trie_root)
485+    return false;
486+
487+  stash->alt.trie_root = alloc_trie_leaf (abfd);
488+  if (!stash->alt.trie_root)
489+    return false;
490+
491   *pinfo = stash;
492
493   if (debug_bfd == NULL)
494@@ -4918,6 +5210,12 @@ stash_comp_unit (struct dwarf2_debug *stash, struct dwarf2_debug_file *file)
495 	  each->next_unit = file->all_comp_units;
496 	  file->all_comp_units = each;
497
498+	  if (each->arange.high == 0)
499+	    {
500+	      each->next_unit_without_ranges = file->all_comp_units_without_ranges;
501+	      file->all_comp_units_without_ranges = each->next_unit_without_ranges;
502+	    }
503+
504 	  file->info_ptr += length;
505 	  return each;
506 	}
507