xref: /OK3568_Linux_fs/yocto/poky/meta/recipes-devtools/gcc/gcc/0004-CVE-2021-42574.patch (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1From bef32d4a28595e933f24fef378cf052a30b674a7 Mon Sep 17 00:00:00 2001
2From: David Malcolm <dmalcolm@redhat.com>
3Date: Tue, 2 Nov 2021 15:45:22 -0400
4Subject: [PATCH] libcpp: capture and underline ranges in -Wbidi-chars=
5 [PR103026]
6MIME-Version: 1.0
7Content-Type: text/plain; charset=utf8
8Content-Transfer-Encoding: 8bit
9
10This patch converts the bidi::vec to use a struct so that we can
11capture location_t values for the bidirectional control characters.
12
13Before:
14
15  Wbidi-chars-1.c: In function âmainâ:
16  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
17      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
18        |                                                                           ^
19  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
20      9 |     /* end admins only <U+202E> { <U+2066>*/
21        |                                            ^
22
23After:
24
25  Wbidi-chars-1.c: In function âmainâ:
26  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
27      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
28        |       ~~~~~~~~                                ~~~~~~~~                    ^
29        |       |                                       |                           |
30        |       |                                       |                           end of bidirectional context
31        |       U+202E (RIGHT-TO-LEFT OVERRIDE)         U+2066 (LEFT-TO-RIGHT ISOLATE)
32  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
33      9 |     /* end admins only <U+202E> { <U+2066>*/
34        |                        ~~~~~~~~   ~~~~~~~~ ^
35        |                        |          |        |
36        |                        |          |        end of bidirectional context
37        |                        |          U+2066 (LEFT-TO-RIGHT ISOLATE)
38        |                        U+202E (RIGHT-TO-LEFT OVERRIDE)
39
40Signed-off-by: David Malcolm <dmalcolm@redhat.com>
41
42gcc/testsuite/ChangeLog:
43	PR preprocessor/103026
44	* c-c++-common/Wbidi-chars-ranges.c: New test.
45
46libcpp/ChangeLog:
47	PR preprocessor/103026
48	* lex.c (struct bidi::context): New.
49	(bidi::vec): Convert to a vec of context rather than unsigned
50	char.
51	(bidi::ctx_at): Rename to...
52	(bidi::pop_kind_at): ...this and reimplement for above change.
53	(bidi::current_ctx): Update for change to vec.
54	(bidi::current_ctx_ucn_p): Likewise.
55	(bidi::current_ctx_loc): New.
56	(bidi::on_char): Update for usage of context struct.  Add "loc"
57	param and pass it when pushing contexts.
58	(get_location_for_byte_range_in_cur_line): New.
59	(get_bidi_utf8): Rename to...
60	(get_bidi_utf8_1): ...this, reintroducing...
61	(get_bidi_utf8): ...as a wrapper, setting *OUT when the result is
62	not NONE.
63	(get_bidi_ucn): Rename to...
64	(get_bidi_ucn_1): ...this, reintroducing...
65	(get_bidi_ucn): ...as a wrapper, setting *OUT when the result is
66	not NONE.
67	(class unpaired_bidi_rich_location): New.
68	(maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when
69	reporting on unpaired bidi chars.  Split into singular vs plural
70	spellings.
71	(maybe_warn_bidi_on_char): Pass in a location_t rather than a
72	const uchar * and use it when emitting warnings, and when calling
73	bidi::on_char.
74	(_cpp_skip_block_comment): Capture location when kind is not NONE
75	and pass it to maybe_warn_bidi_on_char.
76	(skip_line_comment): Likewise.
77	(forms_identifier_p): Likewise.
78	(lex_raw_string): Likewise.
79	(lex_string): Likewise.
80
81Signed-off-by: David Malcolm <dmalcolm@redhat.com>
82
83CVE: CVE-2021-42574
84Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=bef32d4a28595e933f24fef378cf052a30b674a7]
85Signed-off-by: Pgowda <pgowda.cve@gmail.com>
86
87---
88 .../c-c++-common/Wbidi-chars-ranges.c         |  54 ++++
89 libcpp/lex.c                                  | 251 ++++++++++++++----
90 2 files changed, 257 insertions(+), 48 deletions(-)
91 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
92
93diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
94new file mode 100644
95index 00000000000..298750a2a64
96--- /dev/null
97+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
98@@ -0,0 +1,54 @@
99+/* PR preprocessor/103026 */
100+/* { dg-do compile } */
101+/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */
102+/* Verify that we escape and underline pertinent bidirectional
103+   control characters when quoting the source.  */
104+
105+int test_unpaired_bidi () {
106+    int isAdmin = 0;
107+    /*â® } â¦if (isAdmin)⩠⦠begin admins only */
108+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
109+#if 0
110+   { dg-begin-multiline-output "" }
111+     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
112+       ~~~~~~~~                                ~~~~~~~~                    ^
113+       |                                       |                           |
114+       |                                       |                           end of bidirectional context
115+       U+202E (RIGHT-TO-LEFT OVERRIDE)         U+2066 (LEFT-TO-RIGHT ISOLATE)
116+   { dg-end-multiline-output "" }
117+#endif
118+
119+        __builtin_printf("You are an admin.\n");
120+    /* end admins only â® { â¦*/
121+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
122+#if 0
123+   { dg-begin-multiline-output "" }
124+     /* end admins only <U+202E> { <U+2066>*/
125+                        ~~~~~~~~   ~~~~~~~~ ^
126+                        |          |        |
127+                        |          |        end of bidirectional context
128+                        |          U+2066 (LEFT-TO-RIGHT ISOLATE)
129+                        U+202E (RIGHT-TO-LEFT OVERRIDE)
130+   { dg-end-multiline-output "" }
131+#endif
132+
133+    return 0;
134+}
135+
136+int LRE_âª_PDF_\u202c;
137+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
138+#if 0
139+   { dg-begin-multiline-output "" }
140+ int LRE_<U+202A>_PDF_\u202c;
141+         ~~~~~~~~     ^~~~~~
142+   { dg-end-multiline-output "" }
143+#endif
144+
145+const char *s1 = "LRE_âª_PDF_\u202c";
146+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
147+#if 0
148+   { dg-begin-multiline-output "" }
149+ const char *s1 = "LRE_<U+202A>_PDF_\u202c";
150+                       ~~~~~~~~     ^~~~~~
151+   { dg-end-multiline-output "" }
152+#endif
153diff --git a/libcpp/lex.c b/libcpp/lex.c
154index 2421d6c0f40..94c36f0d014 100644
155--- a/libcpp/lex.c
156+++ b/libcpp/lex.c
157@@ -1172,11 +1172,34 @@ namespace bidi {
158   /* All the UTF-8 encodings of bidi characters start with E2.  */
159   constexpr uchar utf8_start = 0xe2;
160
161+  struct context
162+  {
163+    context () {}
164+    context (location_t loc, kind k, bool pdf, bool ucn)
165+    : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
166+    {
167+    }
168+
169+    kind get_pop_kind () const
170+    {
171+      return m_pdf ? kind::PDF : kind::PDI;
172+    }
173+    bool ucn_p () const
174+    {
175+      return m_ucn;
176+    }
177+
178+    location_t m_loc;
179+    kind m_kind;
180+    unsigned m_pdf : 1;
181+    unsigned m_ucn : 1;
182+  };
183+
184   /* A vector holding currently open bidi contexts.  We use a char for
185      each context, its LSB is 1 if it represents a PDF context, 0 if it
186      represents a PDI context.  The next bit is 1 if this context was open
187      by a bidi character written as a UCN, and 0 when it was UTF-8.  */
188-  semi_embedded_vec <unsigned char, 16> vec;
189+  semi_embedded_vec <context, 16> vec;
190
191   /* Close the whole comment/identifier/string literal/character constant
192      context.  */
193@@ -1193,19 +1216,19 @@ namespace bidi {
194     vec.truncate (len - 1);
195   }
196
197-  /* Return the context of the Ith element.  */
198-  kind ctx_at (unsigned int i)
199+  /* Return the pop kind of the context of the Ith element.  */
200+  kind pop_kind_at (unsigned int i)
201   {
202-    return (vec[i] & 1) ? kind::PDF : kind::PDI;
203+    return vec[i].get_pop_kind ();
204   }
205
206-  /* Return which context is currently opened.  */
207+  /* Return the pop kind of the context that is currently opened.  */
208   kind current_ctx ()
209   {
210     unsigned int len = vec.count ();
211     if (len == 0)
212       return kind::NONE;
213-    return ctx_at (len - 1);
214+    return vec[len - 1].get_pop_kind ();
215   }
216
217   /* Return true if the current context comes from a UCN origin, that is,
218@@ -1214,11 +1237,19 @@ namespace bidi {
219   {
220     unsigned int len = vec.count ();
221     gcc_checking_assert (len > 0);
222-    return (vec[len - 1] >> 1) & 1;
223+    return vec[len - 1].m_ucn;
224   }
225
226-  /* We've read a bidi char, update the current vector as necessary.  */
227-  void on_char (kind k, bool ucn_p)
228+  location_t current_ctx_loc ()
229+  {
230+    unsigned int len = vec.count ();
231+    gcc_checking_assert (len > 0);
232+    return vec[len - 1].m_loc;
233+  }
234+
235+  /* We've read a bidi char, update the current vector as necessary.
236+     LOC is only valid when K is not kind::NONE.  */
237+  void on_char (kind k, bool ucn_p, location_t loc)
238   {
239     switch (k)
240       {
241@@ -1226,12 +1257,12 @@ namespace bidi {
242       case kind::RLE:
243       case kind::LRO:
244       case kind::RLO:
245-	vec.push (ucn_p ? 3u : 1u);
246+	vec.push (context (loc, k, true, ucn_p));
247 	break;
248       case kind::LRI:
249       case kind::RLI:
250       case kind::FSI:
251-	vec.push (ucn_p ? 2u : 0u);
252+	vec.push (context (loc, k, false, ucn_p));
253 	break;
254       /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
255 	 whose scope has not yet been terminated.  */
256@@ -1245,7 +1276,7 @@ namespace bidi {
257 	 yet been terminated.  */
258       case kind::PDI:
259 	for (int i = vec.count () - 1; i >= 0; --i)
260-	  if (ctx_at (i) == kind::PDI)
261+	  if (pop_kind_at (i) == kind::PDI)
262 	    {
263 	      vec.truncate (i);
264 	      break;
265@@ -1295,10 +1326,47 @@ namespace bidi {
266   }
267 }
268
269+/* Get location_t for the range of bytes [START, START + NUM_BYTES)
270+   within the current line in FILE, with the caret at START.  */
271+
272+static location_t
273+get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
274+					 const unsigned char *const start,
275+					 size_t num_bytes)
276+{
277+  gcc_checking_assert (num_bytes > 0);
278+
279+  /* CPP_BUF_COLUMN and linemap_position_for_column both refer
280+     to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
281+     whereas linemap_position_for_column is 1-based.  */
282+
283+  /* Get 0-based offsets within the line.  */
284+  size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
285+  size_t end_offset = start_offset + num_bytes - 1;
286+
287+  /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
288+  location_t start_loc = linemap_position_for_column (pfile->line_table,
289+						      start_offset + 1);
290+  location_t end_loc = linemap_position_for_column (pfile->line_table,
291+						     end_offset + 1);
292+
293+  if (start_loc == end_loc)
294+    return start_loc;
295+
296+  source_range src_range;
297+  src_range.m_start = start_loc;
298+  src_range.m_finish = end_loc;
299+  location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
300+						   start_loc,
301+						   src_range,
302+						   NULL);
303+  return combined_loc;
304+}
305+
306 /* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
307
308 static bidi::kind
309-get_bidi_utf8 (const unsigned char *const p)
310+get_bidi_utf8_1 (const unsigned char *const p)
311 {
312   gcc_checking_assert (p[0] == bidi::utf8_start);
313
314@@ -1340,10 +1408,25 @@ get_bidi_utf8 (const unsigned char *cons
315   return bidi::kind::NONE;
316 }
317
318+/* Parse a sequence of 3 bytes starting with P and return its bidi code.
319+   If the kind is not NONE, write the location to *OUT.*/
320+
321+static bidi::kind
322+get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
323+{
324+  bidi::kind result = get_bidi_utf8_1 (p);
325+  if (result != bidi::kind::NONE)
326+    {
327+      /* We have a sequence of 3 bytes starting at P.  */
328+      *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
329+    }
330+  return result;
331+}
332+
333 /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
334
335 static bidi::kind
336-get_bidi_ucn (const unsigned char *p, bool is_U)
337+get_bidi_ucn_1 (const unsigned char *p, bool is_U)
338 {
339   /* 6.4.3 Universal Character Names
340       \u hex-quad
341@@ -1412,6 +1495,62 @@ get_bidi_ucn (const unsigned char *p, bo
342   return bidi::kind::NONE;
343 }
344
345+/* Parse a UCN where P points just past \u or \U and return its bidi code.
346+   If the kind is not NONE, write the location to *OUT.*/
347+
348+static bidi::kind
349+get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
350+	      location_t *out)
351+{
352+  bidi::kind result = get_bidi_ucn_1 (p, is_U);
353+  if (result != bidi::kind::NONE)
354+    {
355+      const unsigned char *start = p - 2;
356+      size_t num_bytes = 2 + (is_U ? 8 : 4);
357+      *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
358+    }
359+  return result;
360+}
361+
362+/* Subclass of rich_location for reporting on unpaired UTF-8
363+   bidirectional control character(s).
364+   Escape the source lines on output, and show all unclosed
365+   bidi context, labelling everything.  */
366+
367+class unpaired_bidi_rich_location : public rich_location
368+{
369+ public:
370+  class custom_range_label : public range_label
371+  {
372+   public:
373+     label_text get_text (unsigned range_idx) const FINAL OVERRIDE
374+     {
375+       /* range 0 is the primary location; each subsequent range i + 1
376+	  is for bidi::vec[i].  */
377+       if (range_idx > 0)
378+	 {
379+	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
380+	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
381+	 }
382+       else
383+	 return label_text::borrow (_("end of bidirectional context"));
384+     }
385+  };
386+
387+  unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
388+  : rich_location (pfile->line_table, loc, &m_custom_label)
389+  {
390+    set_escape_on_output (true);
391+    for (unsigned i = 0; i < bidi::vec.count (); i++)
392+      add_range (bidi::vec[i].m_loc,
393+		 SHOW_RANGE_WITHOUT_CARET,
394+		 &m_custom_label);
395+  }
396+
397+ private:
398+   custom_range_label m_custom_label;
399+};
400+
401 /* We're closing a bidi context, that is, we've encountered a newline,
402    are closing a C-style comment, or are at the end of a string literal,
403    character constant, or identifier.  Warn if this context was not
404@@ -1427,11 +1566,17 @@ maybe_warn_bidi_on_close (cpp_reader *pf
405       const location_t loc
406 	= linemap_position_for_column (pfile->line_table,
407 				       CPP_BUF_COLUMN (pfile->buffer, p));
408-      rich_location rich_loc (pfile->line_table, loc);
409-      rich_loc.set_escape_on_output (true);
410-      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
411-		      "unpaired UTF-8 bidirectional control character "
412-		      "detected");
413+      unpaired_bidi_rich_location rich_loc (pfile, loc);
414+      /* cpp_callbacks doesn't yet have a way to handle singular vs plural
415+	 forms of a diagnostic, so fake it for now.  */
416+      if (bidi::vec.count () > 1)
417+	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
418+			"unpaired UTF-8 bidirectional control characters "
419+			"detected");
420+      else
421+	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
422+			"unpaired UTF-8 bidirectional control character "
423+			"detected");
424     }
425   /* We're done with this context.  */
426   bidi::on_close ();
427@@ -1439,12 +1584,13 @@ maybe_warn_bidi_on_close (cpp_reader *pf
428
429 /* We're at the beginning or in the middle of an identifier/comment/string
430    literal/character constant.  Warn if we've encountered a bidi character.
431-   KIND says which bidi character it was; P points to it in the character
432-   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
433+   KIND says which bidi control character it was; UCN_P is true iff this bidi
434+   control character was written as a UCN.  LOC is the location of the
435+   character, but is only valid if KIND != bidi::kind::NONE.  */
436
437 static void
438-maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
439-			 bool ucn_p)
440+maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
441+			 bool ucn_p, location_t loc)
442 {
443   if (__builtin_expect (kind == bidi::kind::NONE, 1))
444     return;
445@@ -1453,9 +1599,6 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
446
447   if (warn_bidi != bidirectional_none)
448     {
449-      const location_t loc
450-	= linemap_position_for_column (pfile->line_table,
451-				       CPP_BUF_COLUMN (pfile->buffer, p));
452       rich_location rich_loc (pfile->line_table, loc);
453       rich_loc.set_escape_on_output (true);
454
455@@ -1467,9 +1610,12 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
456 	{
457 	  if (warn_bidi == bidirectional_unpaired
458 	      && bidi::current_ctx_ucn_p () != ucn_p)
459-	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
460-			    "UTF-8 vs UCN mismatch when closing "
461-			    "a context by \"%s\"", bidi::to_str (kind));
462+	    {
463+	      rich_loc.add_range (bidi::current_ctx_loc ());
464+	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
465+			      "UTF-8 vs UCN mismatch when closing "
466+			      "a context by \"%s\"", bidi::to_str (kind));
467+	    }
468 	}
469       else if (warn_bidi == bidirectional_any)
470 	{
471@@ -1484,7 +1630,7 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
472 	}
473     }
474   /* We're done with this context.  */
475-  bidi::on_char (kind, ucn_p);
476+  bidi::on_char (kind, ucn_p, loc);
477 }
478
479 /* Skip a C-style block comment.  We find the end of the comment by
480@@ -1552,8 +1698,9 @@ _cpp_skip_block_comment (cpp_reader *pfi
481 	 a bidirectional control character.  */
482       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
483 	{
484-	  bidi::kind kind = get_bidi_utf8 (cur - 1);
485-	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
486+	  location_t loc;
487+	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
488+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
489 	}
490     }
491
492@@ -1586,9 +1733,9 @@ skip_line_comment (cpp_reader *pfile)
493 	    {
494 	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
495 		{
496-		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
497-		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
498-					   /*ucn_p=*/false);
499+		  location_t loc;
500+		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
501+		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
502 		}
503 	      buffer->cur++;
504 	    }
505@@ -1708,9 +1855,9 @@ forms_identifier_p (cpp_reader *pfile, i
506 	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
507 	      && warn_bidi_p)
508 	    {
509-	      bidi::kind kind = get_bidi_utf8 (buffer->cur);
510-	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
511-				       /*ucn_p=*/false);
512+	      location_t loc;
513+	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
514+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
515 	    }
516 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
517 			       state, &s))
518@@ -1722,10 +1869,12 @@ forms_identifier_p (cpp_reader *pfile, i
519 	  buffer->cur += 2;
520 	  if (warn_bidi_p)
521 	    {
522-	      bidi::kind kind = get_bidi_ucn (buffer->cur,
523-					      buffer->cur[-1] == 'U');
524-	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
525-				       /*ucn_p=*/true);
526+	      location_t loc;
527+	      bidi::kind kind = get_bidi_ucn (pfile,
528+					      buffer->cur,
529+					      buffer->cur[-1] == 'U',
530+					      &loc);
531+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
532 	    }
533 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
534 			      state, &s, NULL, NULL))
535@@ -2336,8 +2485,11 @@ lex_raw_string (cpp_reader *pfile, cpp_t
536 	}
537       else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
538 	       && warn_bidi_p)
539-	maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
540-				 /*ucn_p=*/false);
541+	{
542+	  location_t loc;
543+	  bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
544+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
545+	}
546     }
547
548   if (warn_bidi_p)
549@@ -2447,8 +2599,10 @@ lex_string (cpp_reader *pfile, cpp_token
550 	{
551 	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
552 	    {
553-	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
554-	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
555+	      location_t loc;
556+	      bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
557+					      &loc);
558+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
559 	    }
560 	  cur++;
561 	}
562@@ -2476,8 +2630,9 @@ lex_string (cpp_reader *pfile, cpp_token
563 	saw_NUL = true;
564       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
565 	{
566-	  bidi::kind kind = get_bidi_utf8 (cur - 1);
567-	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
568+	  location_t loc;
569+	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
570+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
571 	}
572     }
573
574