1 #ifndef PROTOZERO_VARINT_HPP
2 #define PROTOZERO_VARINT_HPP
3
4 /*****************************************************************************
5
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10
11 *****************************************************************************/
12
13 /**
14 * @file varint.hpp
15 *
16 * @brief Contains low-level varint and zigzag encoding and decoding functions.
17 */
18
19 #include <cstdint>
20
21 #include <protozero/exception.hpp>
22
23 namespace protozero {
24
25 /**
26 * The maximum length of a 64 bit varint.
27 */
28 constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1;
29
30 namespace detail {
31
32 // from https://github.com/facebook/folly/blob/master/folly/Varint.h
decode_varint_impl(const char ** data,const char * end)33 inline uint64_t decode_varint_impl(const char** data, const char* end) {
34 const int8_t* begin = reinterpret_cast<const int8_t*>(*data);
35 const int8_t* iend = reinterpret_cast<const int8_t*>(end);
36 const int8_t* p = begin;
37 uint64_t val = 0;
38
39 if (iend - begin >= max_varint_length) { // fast path
40 do {
41 int64_t b;
42 b = *p++; val = uint64_t((b & 0x7f) ); if (b >= 0) break;
43 b = *p++; val |= uint64_t((b & 0x7f) << 7); if (b >= 0) break;
44 b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break;
45 b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break;
46 b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break;
47 b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break;
48 b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break;
49 b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break;
50 b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break;
51 b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break;
52 throw varint_too_long_exception();
53 } while (false);
54 } else {
55 int shift = 0;
56 while (p != iend && *p < 0) {
57 val |= uint64_t(*p++ & 0x7f) << shift;
58 shift += 7;
59 }
60 if (p == iend) {
61 throw end_of_buffer_exception();
62 }
63 val |= uint64_t(*p++) << shift;
64 }
65
66 *data = reinterpret_cast<const char*>(p);
67 return val;
68 }
69
70 } // end namespace detail
71
72 /**
73 * Decode a 64 bit varint.
74 *
75 * Strong exception guarantee: if there is an exception the data pointer will
76 * not be changed.
77 *
78 * @param[in,out] data Pointer to pointer to the input data. After the function
79 * returns this will point to the next data to be read.
80 * @param[in] end Pointer one past the end of the input data.
81 * @returns The decoded integer
82 * @throws varint_too_long_exception if the varint is longer then the maximum
83 * length that would fit in a 64 bit int. Usually this means your data
84 * is corrupted or you are trying to read something as a varint that
85 * isn't.
86 * @throws end_of_buffer_exception if the *end* of the buffer was reached
87 * before the end of the varint.
88 */
decode_varint(const char ** data,const char * end)89 inline uint64_t decode_varint(const char** data, const char* end) {
90 // If this is a one-byte varint, decode it here.
91 if (end != *data && ((**data & 0x80) == 0)) {
92 uint64_t val = uint64_t(**data);
93 ++(*data);
94 return val;
95 }
96 // If this varint is more than one byte, defer to complete implementation.
97 return detail::decode_varint_impl(data, end);
98 }
99
100 /**
101 * Skip over a varint.
102 *
103 * Strong exception guarantee: if there is an exception the data pointer will
104 * not be changed.
105 *
106 * @param[in,out] data Pointer to pointer to the input data. After the function
107 * returns this will point to the next data to be read.
108 * @param[in] end Pointer one past the end of the input data.
109 * @throws end_of_buffer_exception if the *end* of the buffer was reached
110 * before the end of the varint.
111 */
skip_varint(const char ** data,const char * end)112 inline void skip_varint(const char** data, const char* end) {
113 const int8_t* begin = reinterpret_cast<const int8_t*>(*data);
114 const int8_t* iend = reinterpret_cast<const int8_t*>(end);
115 const int8_t* p = begin;
116
117 while (p != iend && *p < 0) {
118 ++p;
119 }
120
121 if (p >= begin + max_varint_length) {
122 throw varint_too_long_exception();
123 }
124
125 if (p == iend) {
126 throw end_of_buffer_exception();
127 }
128
129 ++p;
130
131 *data = reinterpret_cast<const char*>(p);
132 }
133
134 /**
135 * Varint encode a 64 bit integer.
136 *
137 * @tparam T An output iterator type.
138 * @param data Output iterator the varint encoded value will be written to
139 * byte by byte.
140 * @param value The integer that will be encoded.
141 * @throws Any exception thrown by increment or dereference operator on data.
142 */
143 template <typename T>
write_varint(T data,uint64_t value)144 inline int write_varint(T data, uint64_t value) {
145 int n = 1;
146
147 while (value >= 0x80) {
148 *data++ = char((value & 0x7f) | 0x80);
149 value >>= 7;
150 ++n;
151 }
152 *data++ = char(value);
153
154 return n;
155 }
156
157 /**
158 * ZigZag encodes a 32 bit integer.
159 */
encode_zigzag32(int32_t value)160 inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept {
161 return (static_cast<uint32_t>(value) << 1) ^ (static_cast<uint32_t>(value >> 31));
162 }
163
164 /**
165 * ZigZag encodes a 64 bit integer.
166 */
encode_zigzag64(int64_t value)167 inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept {
168 return (static_cast<uint64_t>(value) << 1) ^ (static_cast<uint64_t>(value >> 63));
169 }
170
171 /**
172 * Decodes a 32 bit ZigZag-encoded integer.
173 */
decode_zigzag32(uint32_t value)174 inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept {
175 return static_cast<int32_t>(value >> 1) ^ -static_cast<int32_t>(value & 1);
176 }
177
178 /**
179 * Decodes a 64 bit ZigZag-encoded integer.
180 */
decode_zigzag64(uint64_t value)181 inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept {
182 return static_cast<int64_t>(value >> 1) ^ -static_cast<int64_t>(value & 1);
183 }
184
185 } // end namespace protozero
186
187 #endif // PROTOZERO_VARINT_HPP
188