1 #ifndef PROTOZERO_VARINT_HPP
2 #define PROTOZERO_VARINT_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
13 /**
14  * @file varint.hpp
15  *
16  * @brief Contains low-level varint and zigzag encoding and decoding functions.
17  */
18 
19 #include <cstdint>
20 
21 #include <protozero/exception.hpp>
22 
23 namespace protozero {
24 
25 /**
26  * The maximum length of a 64 bit varint.
27  */
28 constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1;
29 
30 namespace detail {
31 
32     // from https://github.com/facebook/folly/blob/master/folly/Varint.h
decode_varint_impl(const char ** data,const char * end)33     inline uint64_t decode_varint_impl(const char** data, const char* end) {
34         const int8_t* begin = reinterpret_cast<const int8_t*>(*data);
35         const int8_t* iend = reinterpret_cast<const int8_t*>(end);
36         const int8_t* p = begin;
37         uint64_t val = 0;
38 
39         if (iend - begin >= max_varint_length) {  // fast path
40             do {
41                 int64_t b;
42                 b = *p++; val  = uint64_t((b & 0x7f)      ); if (b >= 0) break;
43                 b = *p++; val |= uint64_t((b & 0x7f) <<  7); if (b >= 0) break;
44                 b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break;
45                 b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break;
46                 b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break;
47                 b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break;
48                 b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break;
49                 b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break;
50                 b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break;
51                 b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break;
52                 throw varint_too_long_exception();
53             } while (false);
54         } else {
55             int shift = 0;
56             while (p != iend && *p < 0) {
57                 val |= uint64_t(*p++ & 0x7f) << shift;
58                 shift += 7;
59             }
60             if (p == iend) {
61                 throw end_of_buffer_exception();
62             }
63             val |= uint64_t(*p++) << shift;
64         }
65 
66         *data = reinterpret_cast<const char*>(p);
67         return val;
68     }
69 
70 } // end namespace detail
71 
72 /**
73  * Decode a 64 bit varint.
74  *
75  * Strong exception guarantee: if there is an exception the data pointer will
76  * not be changed.
77  *
78  * @param[in,out] data Pointer to pointer to the input data. After the function
79  *        returns this will point to the next data to be read.
80  * @param[in] end Pointer one past the end of the input data.
81  * @returns The decoded integer
82  * @throws varint_too_long_exception if the varint is longer then the maximum
83  *         length that would fit in a 64 bit int. Usually this means your data
84  *         is corrupted or you are trying to read something as a varint that
85  *         isn't.
86  * @throws end_of_buffer_exception if the *end* of the buffer was reached
87  *         before the end of the varint.
88  */
decode_varint(const char ** data,const char * end)89 inline uint64_t decode_varint(const char** data, const char* end) {
90     // If this is a one-byte varint, decode it here.
91     if (end != *data && ((**data & 0x80) == 0)) {
92         uint64_t val = uint64_t(**data);
93         ++(*data);
94         return val;
95     }
96     // If this varint is more than one byte, defer to complete implementation.
97     return detail::decode_varint_impl(data, end);
98 }
99 
100 /**
101  * Skip over a varint.
102  *
103  * Strong exception guarantee: if there is an exception the data pointer will
104  * not be changed.
105  *
106  * @param[in,out] data Pointer to pointer to the input data. After the function
107  *        returns this will point to the next data to be read.
108  * @param[in] end Pointer one past the end of the input data.
109  * @throws end_of_buffer_exception if the *end* of the buffer was reached
110  *         before the end of the varint.
111  */
skip_varint(const char ** data,const char * end)112 inline void skip_varint(const char** data, const char* end) {
113     const int8_t* begin = reinterpret_cast<const int8_t*>(*data);
114     const int8_t* iend = reinterpret_cast<const int8_t*>(end);
115     const int8_t* p = begin;
116 
117     while (p != iend && *p < 0) {
118         ++p;
119     }
120 
121     if (p >= begin + max_varint_length) {
122         throw varint_too_long_exception();
123     }
124 
125     if (p == iend) {
126         throw end_of_buffer_exception();
127     }
128 
129     ++p;
130 
131     *data = reinterpret_cast<const char*>(p);
132 }
133 
134 /**
135  * Varint encode a 64 bit integer.
136  *
137  * @tparam T An output iterator type.
138  * @param data Output iterator the varint encoded value will be written to
139  *             byte by byte.
140  * @param value The integer that will be encoded.
141  * @throws Any exception thrown by increment or dereference operator on data.
142  */
143 template <typename T>
write_varint(T data,uint64_t value)144 inline int write_varint(T data, uint64_t value) {
145     int n = 1;
146 
147     while (value >= 0x80) {
148         *data++ = char((value & 0x7f) | 0x80);
149         value >>= 7;
150         ++n;
151     }
152     *data++ = char(value);
153 
154     return n;
155 }
156 
157 /**
158  * ZigZag encodes a 32 bit integer.
159  */
encode_zigzag32(int32_t value)160 inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept {
161     return (static_cast<uint32_t>(value) << 1) ^ (static_cast<uint32_t>(value >> 31));
162 }
163 
164 /**
165  * ZigZag encodes a 64 bit integer.
166  */
encode_zigzag64(int64_t value)167 inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept {
168     return (static_cast<uint64_t>(value) << 1) ^ (static_cast<uint64_t>(value >> 63));
169 }
170 
171 /**
172  * Decodes a 32 bit ZigZag-encoded integer.
173  */
decode_zigzag32(uint32_t value)174 inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept {
175     return static_cast<int32_t>(value >> 1) ^ -static_cast<int32_t>(value & 1);
176 }
177 
178 /**
179  * Decodes a 64 bit ZigZag-encoded integer.
180  */
decode_zigzag64(uint64_t value)181 inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept {
182     return static_cast<int64_t>(value >> 1) ^ -static_cast<int64_t>(value & 1);
183 }
184 
185 } // end namespace protozero
186 
187 #endif // PROTOZERO_VARINT_HPP
188