1 #include <mbgl/text/bidi.hpp>
2 #include <mbgl/util/traits.hpp>
3
4 #include <unicode/ubidi.h>
5 #include <unicode/ushape.h>
6
7 #include <memory>
8 #include <stdexcept>
9
10 namespace mbgl {
11
12 class BiDiImpl {
13 public:
BiDiImpl()14 BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) {
15 }
~BiDiImpl()16 ~BiDiImpl() {
17 ubidi_close(bidiText);
18 ubidi_close(bidiLine);
19 }
20
21 UBiDi* bidiText = nullptr;
22 UBiDi* bidiLine = nullptr;
23 };
24
BiDi()25 BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
26 BiDi::~BiDi() = default;
27
28 // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
29 // logical order. Output won't be intelligible until the bidirectional algorithm is applied
applyArabicShaping(const std::u16string & input)30 std::u16string applyArabicShaping(const std::u16string& input) {
31 UErrorCode errorCode = U_ZERO_ERROR;
32
33 const int32_t outputLength =
34 u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), nullptr, 0,
35 (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
36 (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
37 &errorCode);
38
39 // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
40 errorCode = U_ZERO_ERROR;
41
42 std::u16string outputText(outputLength, 0);
43
44 u_shapeArabic(mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()), mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
45 (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
46 (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
47 &errorCode);
48
49 // If the algorithm fails for any reason, fall back to non-transformed text
50 if (U_FAILURE(errorCode))
51 return input;
52
53 return outputText;
54 }
55
mergeParagraphLineBreaks(std::set<size_t> & lineBreakPoints)56 void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
57 int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
58 for (int32_t i = 0; i < paragraphCount; i++) {
59 UErrorCode errorCode = U_ZERO_ERROR;
60 int32_t paragraphEndIndex;
61 ubidi_getParagraphByIndex(impl->bidiText, i, nullptr, ¶graphEndIndex, nullptr, &errorCode);
62
63 if (U_FAILURE(errorCode)) {
64 throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
65 u_errorName(errorCode));
66 }
67
68 lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
69 }
70 }
71
applyLineBreaking(std::set<std::size_t> lineBreakPoints)72 std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
73 // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
74 // paragraph boundaries are included in the set of line break points. The calling code might not
75 // include the line break because it didn't need to wrap at that point, or because the text was
76 // separated with a more exotic code point such as (U+001C)
77 mergeParagraphLineBreaks(lineBreakPoints);
78
79 std::vector<std::u16string> transformedLines;
80 transformedLines.reserve(lineBreakPoints.size());
81
82 std::size_t start = 0;
83 for (std::size_t lineBreakPoint : lineBreakPoints) {
84 transformedLines.push_back(getLine(start, lineBreakPoint));
85 start = lineBreakPoint;
86 }
87
88 return transformedLines;
89 }
90
processText(const std::u16string & input,std::set<std::size_t> lineBreakPoints)91 std::vector<std::u16string> BiDi::processText(const std::u16string& input,
92 std::set<std::size_t> lineBreakPoints) {
93 UErrorCode errorCode = U_ZERO_ERROR;
94
95 ubidi_setPara(impl->bidiText, mbgl::utf16char_cast<const UChar*>(input.c_str()), static_cast<int32_t>(input.size()),
96 UBIDI_DEFAULT_LTR, nullptr, &errorCode);
97
98 if (U_FAILURE(errorCode)) {
99 throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode));
100 }
101
102 return applyLineBreaking(lineBreakPoints);
103 }
104
getLine(std::size_t start,std::size_t end)105 std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
106 UErrorCode errorCode = U_ZERO_ERROR;
107 ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);
108
109 if (U_FAILURE(errorCode)) {
110 throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));
111 }
112
113 // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
114 // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
115 // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
116 const int32_t outputLength = ubidi_getProcessedLength(impl->bidiLine);
117 std::u16string outputText(outputLength, 0);
118
119 // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
120 // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
121 // they don't show up on screen (some fonts have glyphs representing them)
122 ubidi_writeReordered(impl->bidiLine, mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
123 UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);
124
125 if (U_FAILURE(errorCode)) {
126 throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
127 u_errorName(errorCode));
128 }
129
130 return outputText;
131 }
132
133 } // end namespace mbgl
134