1 #include <mbgl/text/bidi.hpp>
2 #include <mbgl/util/traits.hpp>
3 
4 #include <unicode/ubidi.h>
5 #include <unicode/ushape.h>
6 
7 #include <memory>
8 #include <stdexcept>
9 
10 namespace mbgl {
11 
12 class BiDiImpl {
13 public:
BiDiImpl()14     BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) {
15     }
~BiDiImpl()16     ~BiDiImpl() {
17         ubidi_close(bidiText);
18         ubidi_close(bidiLine);
19     }
20 
21     UBiDi* bidiText = nullptr;
22     UBiDi* bidiLine = nullptr;
23 };
24 
BiDi()25 BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
26 BiDi::~BiDi() = default;
27 
28 // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
29 // logical order. Output won't be intelligible until the bidirectional algorithm is applied
applyArabicShaping(const std::u16string & input)30 std::u16string applyArabicShaping(const std::u16string& input) {
31     UErrorCode errorCode = U_ZERO_ERROR;
32 
33     const int32_t outputLength =
34         u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), nullptr, 0,
35                       (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
36                           (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
37                       &errorCode);
38 
39     // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
40     errorCode = U_ZERO_ERROR;
41 
42     std::u16string outputText(outputLength, 0);
43 
44     u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
45                   (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
46                       (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
47                   &errorCode);
48 
49     // If the algorithm fails for any reason, fall back to non-transformed text
50     if (U_FAILURE(errorCode))
51         return input;
52 
53     return outputText;
54 }
55 
mergeParagraphLineBreaks(std::set<size_t> & lineBreakPoints)56 void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
57     int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
58     for (int32_t i = 0; i < paragraphCount; i++) {
59         UErrorCode errorCode = U_ZERO_ERROR;
60         int32_t paragraphEndIndex;
61         ubidi_getParagraphByIndex(impl->bidiText, i, nullptr, &paragraphEndIndex, nullptr, &errorCode);
62 
63         if (U_FAILURE(errorCode)) {
64             throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
65                                      u_errorName(errorCode));
66         }
67 
68         lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
69     }
70 }
71 
applyLineBreaking(std::set<std::size_t> lineBreakPoints)72 std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
73     // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
74     // paragraph boundaries are included in the set of line break points. The calling code might not
75     // include the line break because it didn't need to wrap at that point, or because the text was
76     // separated with a more exotic code point such as (U+001C)
77     mergeParagraphLineBreaks(lineBreakPoints);
78 
79     std::vector<std::u16string> transformedLines;
80     transformedLines.reserve(lineBreakPoints.size());
81 
82     std::size_t start = 0;
83     for (std::size_t lineBreakPoint : lineBreakPoints) {
84         transformedLines.push_back(getLine(start, lineBreakPoint));
85         start = lineBreakPoint;
86     }
87 
88     return transformedLines;
89 }
90 
processText(const std::u16string & input,std::set<std::size_t> lineBreakPoints)91 std::vector<std::u16string> BiDi::processText(const std::u16string& input,
92                                               std::set<std::size_t> lineBreakPoints) {
93     UErrorCode errorCode = U_ZERO_ERROR;
94 
95     ubidi_setPara(impl->bidiText, mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()),
96                   UBIDI_DEFAULT_LTR, nullptr, &errorCode);
97 
98     if (U_FAILURE(errorCode)) {
99         throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode));
100     }
101 
102     return applyLineBreaking(lineBreakPoints);
103 }
104 
getLine(std::size_t start,std::size_t end)105 std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
106     UErrorCode errorCode = U_ZERO_ERROR;
107     ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);
108 
109     if (U_FAILURE(errorCode)) {
110         throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));
111     }
112 
113     // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
114     //  Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
115     //  ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
116     const int32_t outputLength = ubidi_getProcessedLength(impl->bidiLine);
117     std::u16string outputText(outputLength, 0);
118 
119     // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
120     // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
121     // they don't show up on screen (some fonts have glyphs representing them)
122     ubidi_writeReordered(impl->bidiLine, mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
123                          UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);
124 
125     if (U_FAILURE(errorCode)) {
126         throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
127                                  u_errorName(errorCode));
128     }
129 
130     return outputText;
131 }
132 
133 } // end namespace mbgl
134