1From bef32d4a28595e933f24fef378cf052a30b674a7 Mon Sep 17 00:00:00 2001 2From: David Malcolm <dmalcolm@redhat.com> 3Date: Tue, 2 Nov 2021 15:45:22 -0400 4Subject: [PATCH] libcpp: capture and underline ranges in -Wbidi-chars= 5 [PR103026] 6MIME-Version: 1.0 7Content-Type: text/plain; charset=utf8 8Content-Transfer-Encoding: 8bit 9 10This patch converts the bidi::vec to use a struct so that we can 11capture location_t values for the bidirectional control characters. 12 13Before: 14 15 Wbidi-chars-1.c: In function âmainâ: 16 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 17 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ 18 | ^ 19 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 20 9 | /* end admins only <U+202E> { <U+2066>*/ 21 | ^ 22 23After: 24 25 Wbidi-chars-1.c: In function âmainâ: 26 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=] 27 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ 28 | ~~~~~~~~ ~~~~~~~~ ^ 29 | | | | 30 | | | end of bidirectional context 31 | U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE) 32 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=] 33 9 | /* end admins only <U+202E> { <U+2066>*/ 34 | ~~~~~~~~ ~~~~~~~~ ^ 35 | | | | 36 | | | end of bidirectional context 37 | | U+2066 (LEFT-TO-RIGHT ISOLATE) 38 | U+202E (RIGHT-TO-LEFT OVERRIDE) 39 40Signed-off-by: David Malcolm <dmalcolm@redhat.com> 41 42gcc/testsuite/ChangeLog: 43 PR preprocessor/103026 44 * c-c++-common/Wbidi-chars-ranges.c: New test. 45 46libcpp/ChangeLog: 47 PR preprocessor/103026 48 * lex.c (struct bidi::context): New. 49 (bidi::vec): Convert to a vec of context rather than unsigned 50 char. 51 (bidi::ctx_at): Rename to... 52 (bidi::pop_kind_at): ...this and reimplement for above change. 53 (bidi::current_ctx): Update for change to vec. 54 (bidi::current_ctx_ucn_p): Likewise. 55 (bidi::current_ctx_loc): New. 56 (bidi::on_char): Update for usage of context struct. Add "loc" 57 param and pass it when pushing contexts. 58 (get_location_for_byte_range_in_cur_line): New. 59 (get_bidi_utf8): Rename to... 60 (get_bidi_utf8_1): ...this, reintroducing... 61 (get_bidi_utf8): ...as a wrapper, setting *OUT when the result is 62 not NONE. 63 (get_bidi_ucn): Rename to... 64 (get_bidi_ucn_1): ...this, reintroducing... 65 (get_bidi_ucn): ...as a wrapper, setting *OUT when the result is 66 not NONE. 67 (class unpaired_bidi_rich_location): New. 68 (maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when 69 reporting on unpaired bidi chars. Split into singular vs plural 70 spellings. 71 (maybe_warn_bidi_on_char): Pass in a location_t rather than a 72 const uchar * and use it when emitting warnings, and when calling 73 bidi::on_char. 74 (_cpp_skip_block_comment): Capture location when kind is not NONE 75 and pass it to maybe_warn_bidi_on_char. 76 (skip_line_comment): Likewise. 77 (forms_identifier_p): Likewise. 78 (lex_raw_string): Likewise. 79 (lex_string): Likewise. 80 81Signed-off-by: David Malcolm <dmalcolm@redhat.com> 82 83CVE: CVE-2021-42574 84Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=bef32d4a28595e933f24fef378cf052a30b674a7] 85Signed-off-by: Pgowda <pgowda.cve@gmail.com> 86 87--- 88 .../c-c++-common/Wbidi-chars-ranges.c | 54 ++++ 89 libcpp/lex.c | 251 ++++++++++++++---- 90 2 files changed, 257 insertions(+), 48 deletions(-) 91 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c 92 93diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c 94new file mode 100644 95index 00000000000..298750a2a64 96--- /dev/null 97+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c 98@@ -0,0 +1,54 @@ 99+/* PR preprocessor/103026 */ 100+/* { dg-do compile } */ 101+/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */ 102+/* Verify that we escape and underline pertinent bidirectional 103+ control characters when quoting the source. */ 104+ 105+int test_unpaired_bidi () { 106+ int isAdmin = 0; 107+ /*â® } â¦if (isAdmin)⩠⦠begin admins only */ 108+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ 109+#if 0 110+ { dg-begin-multiline-output "" } 111+ /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ 112+ ~~~~~~~~ ~~~~~~~~ ^ 113+ | | | 114+ | | end of bidirectional context 115+ U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE) 116+ { dg-end-multiline-output "" } 117+#endif 118+ 119+ __builtin_printf("You are an admin.\n"); 120+ /* end admins only â® { â¦*/ 121+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ 122+#if 0 123+ { dg-begin-multiline-output "" } 124+ /* end admins only <U+202E> { <U+2066>*/ 125+ ~~~~~~~~ ~~~~~~~~ ^ 126+ | | | 127+ | | end of bidirectional context 128+ | U+2066 (LEFT-TO-RIGHT ISOLATE) 129+ U+202E (RIGHT-TO-LEFT OVERRIDE) 130+ { dg-end-multiline-output "" } 131+#endif 132+ 133+ return 0; 134+} 135+ 136+int LRE_âª_PDF_\u202c; 137+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ 138+#if 0 139+ { dg-begin-multiline-output "" } 140+ int LRE_<U+202A>_PDF_\u202c; 141+ ~~~~~~~~ ^~~~~~ 142+ { dg-end-multiline-output "" } 143+#endif 144+ 145+const char *s1 = "LRE_âª_PDF_\u202c"; 146+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ 147+#if 0 148+ { dg-begin-multiline-output "" } 149+ const char *s1 = "LRE_<U+202A>_PDF_\u202c"; 150+ ~~~~~~~~ ^~~~~~ 151+ { dg-end-multiline-output "" } 152+#endif 153diff --git a/libcpp/lex.c b/libcpp/lex.c 154index 2421d6c0f40..94c36f0d014 100644 155--- a/libcpp/lex.c 156+++ b/libcpp/lex.c 157@@ -1172,11 +1172,34 @@ namespace bidi { 158 /* All the UTF-8 encodings of bidi characters start with E2. */ 159 constexpr uchar utf8_start = 0xe2; 160 161+ struct context 162+ { 163+ context () {} 164+ context (location_t loc, kind k, bool pdf, bool ucn) 165+ : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn) 166+ { 167+ } 168+ 169+ kind get_pop_kind () const 170+ { 171+ return m_pdf ? kind::PDF : kind::PDI; 172+ } 173+ bool ucn_p () const 174+ { 175+ return m_ucn; 176+ } 177+ 178+ location_t m_loc; 179+ kind m_kind; 180+ unsigned m_pdf : 1; 181+ unsigned m_ucn : 1; 182+ }; 183+ 184 /* A vector holding currently open bidi contexts. We use a char for 185 each context, its LSB is 1 if it represents a PDF context, 0 if it 186 represents a PDI context. The next bit is 1 if this context was open 187 by a bidi character written as a UCN, and 0 when it was UTF-8. */ 188- semi_embedded_vec <unsigned char, 16> vec; 189+ semi_embedded_vec <context, 16> vec; 190 191 /* Close the whole comment/identifier/string literal/character constant 192 context. */ 193@@ -1193,19 +1216,19 @@ namespace bidi { 194 vec.truncate (len - 1); 195 } 196 197- /* Return the context of the Ith element. */ 198- kind ctx_at (unsigned int i) 199+ /* Return the pop kind of the context of the Ith element. */ 200+ kind pop_kind_at (unsigned int i) 201 { 202- return (vec[i] & 1) ? kind::PDF : kind::PDI; 203+ return vec[i].get_pop_kind (); 204 } 205 206- /* Return which context is currently opened. */ 207+ /* Return the pop kind of the context that is currently opened. */ 208 kind current_ctx () 209 { 210 unsigned int len = vec.count (); 211 if (len == 0) 212 return kind::NONE; 213- return ctx_at (len - 1); 214+ return vec[len - 1].get_pop_kind (); 215 } 216 217 /* Return true if the current context comes from a UCN origin, that is, 218@@ -1214,11 +1237,19 @@ namespace bidi { 219 { 220 unsigned int len = vec.count (); 221 gcc_checking_assert (len > 0); 222- return (vec[len - 1] >> 1) & 1; 223+ return vec[len - 1].m_ucn; 224 } 225 226- /* We've read a bidi char, update the current vector as necessary. */ 227- void on_char (kind k, bool ucn_p) 228+ location_t current_ctx_loc () 229+ { 230+ unsigned int len = vec.count (); 231+ gcc_checking_assert (len > 0); 232+ return vec[len - 1].m_loc; 233+ } 234+ 235+ /* We've read a bidi char, update the current vector as necessary. 236+ LOC is only valid when K is not kind::NONE. */ 237+ void on_char (kind k, bool ucn_p, location_t loc) 238 { 239 switch (k) 240 { 241@@ -1226,12 +1257,12 @@ namespace bidi { 242 case kind::RLE: 243 case kind::LRO: 244 case kind::RLO: 245- vec.push (ucn_p ? 3u : 1u); 246+ vec.push (context (loc, k, true, ucn_p)); 247 break; 248 case kind::LRI: 249 case kind::RLI: 250 case kind::FSI: 251- vec.push (ucn_p ? 2u : 0u); 252+ vec.push (context (loc, k, false, ucn_p)); 253 break; 254 /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO 255 whose scope has not yet been terminated. */ 256@@ -1245,7 +1276,7 @@ namespace bidi { 257 yet been terminated. */ 258 case kind::PDI: 259 for (int i = vec.count () - 1; i >= 0; --i) 260- if (ctx_at (i) == kind::PDI) 261+ if (pop_kind_at (i) == kind::PDI) 262 { 263 vec.truncate (i); 264 break; 265@@ -1295,10 +1326,47 @@ namespace bidi { 266 } 267 } 268 269+/* Get location_t for the range of bytes [START, START + NUM_BYTES) 270+ within the current line in FILE, with the caret at START. */ 271+ 272+static location_t 273+get_location_for_byte_range_in_cur_line (cpp_reader *pfile, 274+ const unsigned char *const start, 275+ size_t num_bytes) 276+{ 277+ gcc_checking_assert (num_bytes > 0); 278+ 279+ /* CPP_BUF_COLUMN and linemap_position_for_column both refer 280+ to offsets in bytes, but CPP_BUF_COLUMN is 0-based, 281+ whereas linemap_position_for_column is 1-based. */ 282+ 283+ /* Get 0-based offsets within the line. */ 284+ size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start); 285+ size_t end_offset = start_offset + num_bytes - 1; 286+ 287+ /* Now convert to location_t, where "columns" are 1-based byte offsets. */ 288+ location_t start_loc = linemap_position_for_column (pfile->line_table, 289+ start_offset + 1); 290+ location_t end_loc = linemap_position_for_column (pfile->line_table, 291+ end_offset + 1); 292+ 293+ if (start_loc == end_loc) 294+ return start_loc; 295+ 296+ source_range src_range; 297+ src_range.m_start = start_loc; 298+ src_range.m_finish = end_loc; 299+ location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table, 300+ start_loc, 301+ src_range, 302+ NULL); 303+ return combined_loc; 304+} 305+ 306 /* Parse a sequence of 3 bytes starting with P and return its bidi code. */ 307 308 static bidi::kind 309-get_bidi_utf8 (const unsigned char *const p) 310+get_bidi_utf8_1 (const unsigned char *const p) 311 { 312 gcc_checking_assert (p[0] == bidi::utf8_start); 313 314@@ -1340,10 +1408,25 @@ get_bidi_utf8 (const unsigned char *cons 315 return bidi::kind::NONE; 316 } 317 318+/* Parse a sequence of 3 bytes starting with P and return its bidi code. 319+ If the kind is not NONE, write the location to *OUT.*/ 320+ 321+static bidi::kind 322+get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out) 323+{ 324+ bidi::kind result = get_bidi_utf8_1 (p); 325+ if (result != bidi::kind::NONE) 326+ { 327+ /* We have a sequence of 3 bytes starting at P. */ 328+ *out = get_location_for_byte_range_in_cur_line (pfile, p, 3); 329+ } 330+ return result; 331+} 332+ 333 /* Parse a UCN where P points just past \u or \U and return its bidi code. */ 334 335 static bidi::kind 336-get_bidi_ucn (const unsigned char *p, bool is_U) 337+get_bidi_ucn_1 (const unsigned char *p, bool is_U) 338 { 339 /* 6.4.3 Universal Character Names 340 \u hex-quad 341@@ -1412,6 +1495,62 @@ get_bidi_ucn (const unsigned char *p, bo 342 return bidi::kind::NONE; 343 } 344 345+/* Parse a UCN where P points just past \u or \U and return its bidi code. 346+ If the kind is not NONE, write the location to *OUT.*/ 347+ 348+static bidi::kind 349+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U, 350+ location_t *out) 351+{ 352+ bidi::kind result = get_bidi_ucn_1 (p, is_U); 353+ if (result != bidi::kind::NONE) 354+ { 355+ const unsigned char *start = p - 2; 356+ size_t num_bytes = 2 + (is_U ? 8 : 4); 357+ *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes); 358+ } 359+ return result; 360+} 361+ 362+/* Subclass of rich_location for reporting on unpaired UTF-8 363+ bidirectional control character(s). 364+ Escape the source lines on output, and show all unclosed 365+ bidi context, labelling everything. */ 366+ 367+class unpaired_bidi_rich_location : public rich_location 368+{ 369+ public: 370+ class custom_range_label : public range_label 371+ { 372+ public: 373+ label_text get_text (unsigned range_idx) const FINAL OVERRIDE 374+ { 375+ /* range 0 is the primary location; each subsequent range i + 1 376+ is for bidi::vec[i]. */ 377+ if (range_idx > 0) 378+ { 379+ const bidi::context &ctxt (bidi::vec[range_idx - 1]); 380+ return label_text::borrow (bidi::to_str (ctxt.m_kind)); 381+ } 382+ else 383+ return label_text::borrow (_("end of bidirectional context")); 384+ } 385+ }; 386+ 387+ unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc) 388+ : rich_location (pfile->line_table, loc, &m_custom_label) 389+ { 390+ set_escape_on_output (true); 391+ for (unsigned i = 0; i < bidi::vec.count (); i++) 392+ add_range (bidi::vec[i].m_loc, 393+ SHOW_RANGE_WITHOUT_CARET, 394+ &m_custom_label); 395+ } 396+ 397+ private: 398+ custom_range_label m_custom_label; 399+}; 400+ 401 /* We're closing a bidi context, that is, we've encountered a newline, 402 are closing a C-style comment, or are at the end of a string literal, 403 character constant, or identifier. Warn if this context was not 404@@ -1427,11 +1566,17 @@ maybe_warn_bidi_on_close (cpp_reader *pf 405 const location_t loc 406 = linemap_position_for_column (pfile->line_table, 407 CPP_BUF_COLUMN (pfile->buffer, p)); 408- rich_location rich_loc (pfile->line_table, loc); 409- rich_loc.set_escape_on_output (true); 410- cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, 411- "unpaired UTF-8 bidirectional control character " 412- "detected"); 413+ unpaired_bidi_rich_location rich_loc (pfile, loc); 414+ /* cpp_callbacks doesn't yet have a way to handle singular vs plural 415+ forms of a diagnostic, so fake it for now. */ 416+ if (bidi::vec.count () > 1) 417+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, 418+ "unpaired UTF-8 bidirectional control characters " 419+ "detected"); 420+ else 421+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, 422+ "unpaired UTF-8 bidirectional control character " 423+ "detected"); 424 } 425 /* We're done with this context. */ 426 bidi::on_close (); 427@@ -1439,12 +1584,13 @@ maybe_warn_bidi_on_close (cpp_reader *pf 428 429 /* We're at the beginning or in the middle of an identifier/comment/string 430 literal/character constant. Warn if we've encountered a bidi character. 431- KIND says which bidi character it was; P points to it in the character 432- stream. UCN_P is true iff this bidi character was written as a UCN. */ 433+ KIND says which bidi control character it was; UCN_P is true iff this bidi 434+ control character was written as a UCN. LOC is the location of the 435+ character, but is only valid if KIND != bidi::kind::NONE. */ 436 437 static void 438-maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, 439- bool ucn_p) 440+maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind, 441+ bool ucn_p, location_t loc) 442 { 443 if (__builtin_expect (kind == bidi::kind::NONE, 1)) 444 return; 445@@ -1453,9 +1599,6 @@ maybe_warn_bidi_on_char (cpp_reader *pfi 446 447 if (warn_bidi != bidirectional_none) 448 { 449- const location_t loc 450- = linemap_position_for_column (pfile->line_table, 451- CPP_BUF_COLUMN (pfile->buffer, p)); 452 rich_location rich_loc (pfile->line_table, loc); 453 rich_loc.set_escape_on_output (true); 454 455@@ -1467,9 +1610,12 @@ maybe_warn_bidi_on_char (cpp_reader *pfi 456 { 457 if (warn_bidi == bidirectional_unpaired 458 && bidi::current_ctx_ucn_p () != ucn_p) 459- cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, 460- "UTF-8 vs UCN mismatch when closing " 461- "a context by \"%s\"", bidi::to_str (kind)); 462+ { 463+ rich_loc.add_range (bidi::current_ctx_loc ()); 464+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, 465+ "UTF-8 vs UCN mismatch when closing " 466+ "a context by \"%s\"", bidi::to_str (kind)); 467+ } 468 } 469 else if (warn_bidi == bidirectional_any) 470 { 471@@ -1484,7 +1630,7 @@ maybe_warn_bidi_on_char (cpp_reader *pfi 472 } 473 } 474 /* We're done with this context. */ 475- bidi::on_char (kind, ucn_p); 476+ bidi::on_char (kind, ucn_p, loc); 477 } 478 479 /* Skip a C-style block comment. We find the end of the comment by 480@@ -1552,8 +1698,9 @@ _cpp_skip_block_comment (cpp_reader *pfi 481 a bidirectional control character. */ 482 else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) 483 { 484- bidi::kind kind = get_bidi_utf8 (cur - 1); 485- maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); 486+ location_t loc; 487+ bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); 488+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); 489 } 490 } 491 492@@ -1586,9 +1733,9 @@ skip_line_comment (cpp_reader *pfile) 493 { 494 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) 495 { 496- bidi::kind kind = get_bidi_utf8 (buffer->cur); 497- maybe_warn_bidi_on_char (pfile, buffer->cur, kind, 498- /*ucn_p=*/false); 499+ location_t loc; 500+ bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); 501+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); 502 } 503 buffer->cur++; 504 } 505@@ -1708,9 +1855,9 @@ forms_identifier_p (cpp_reader *pfile, i 506 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) 507 && warn_bidi_p) 508 { 509- bidi::kind kind = get_bidi_utf8 (buffer->cur); 510- maybe_warn_bidi_on_char (pfile, buffer->cur, kind, 511- /*ucn_p=*/false); 512+ location_t loc; 513+ bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); 514+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); 515 } 516 if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 517 state, &s)) 518@@ -1722,10 +1869,12 @@ forms_identifier_p (cpp_reader *pfile, i 519 buffer->cur += 2; 520 if (warn_bidi_p) 521 { 522- bidi::kind kind = get_bidi_ucn (buffer->cur, 523- buffer->cur[-1] == 'U'); 524- maybe_warn_bidi_on_char (pfile, buffer->cur, kind, 525- /*ucn_p=*/true); 526+ location_t loc; 527+ bidi::kind kind = get_bidi_ucn (pfile, 528+ buffer->cur, 529+ buffer->cur[-1] == 'U', 530+ &loc); 531+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); 532 } 533 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 534 state, &s, NULL, NULL)) 535@@ -2336,8 +2485,11 @@ lex_raw_string (cpp_reader *pfile, cpp_t 536 } 537 else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) 538 && warn_bidi_p) 539- maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), 540- /*ucn_p=*/false); 541+ { 542+ location_t loc; 543+ bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc); 544+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); 545+ } 546 } 547 548 if (warn_bidi_p) 549@@ -2447,8 +2599,10 @@ lex_string (cpp_reader *pfile, cpp_token 550 { 551 if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) 552 { 553- bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); 554- maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); 555+ location_t loc; 556+ bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U', 557+ &loc); 558+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); 559 } 560 cur++; 561 } 562@@ -2476,8 +2630,9 @@ lex_string (cpp_reader *pfile, cpp_token 563 saw_NUL = true; 564 else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) 565 { 566- bidi::kind kind = get_bidi_utf8 (cur - 1); 567- maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); 568+ location_t loc; 569+ bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); 570+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); 571 } 572 } 573 574