1 #include <mbgl/storage/http_file_source.hpp>
2 #include <mbgl/storage/resource.hpp>
3 #include <mbgl/storage/response.hpp>
4 #include <mbgl/util/logging.hpp>
5
6 #include <mbgl/util/util.hpp>
7 #include <mbgl/util/optional.hpp>
8 #include <mbgl/util/run_loop.hpp>
9 #include <mbgl/util/string.hpp>
10 #include <mbgl/util/timer.hpp>
11 #include <mbgl/util/chrono.hpp>
12 #include <mbgl/util/http_header.hpp>
13
14 #include <curl/curl.h>
15
16 // Dynamically load all cURL functions. Debian-derived systems upgraded the OpenSSL version linked
17 // to in https://salsa.debian.org/debian/curl/commit/95c94957bb7e89e36e78b995fed468c42f64d18d
18 // They state:
19 // Rename libcurl3 to libcurl4, because libcurl exposes an SSL_CTX via
20 // CURLOPT_SSL_CTX_FUNCTION, and this object changes incompatibly between
21 // openssl 1.0 and openssl 1.1.
22 // Since we are not accessing the underlying OpenSSL context, we don't care whether we're linking
23 // against libcurl3 or libcurl4; both use the ABI version 4 which hasn't changed since 2006
24 // (see https://curl.haxx.se/libcurl/abi.html). In fact, cURL's ABI compatibility is very good as
25 // shown on https://abi-laboratory.pro/tracker/timeline/curl/
26 // Therefore, we're dynamically loading the cURL symbols we need to avoid linking against versioned
27 // symbols.
28 #include <dlfcn.h>
29
30 namespace curl {
31
32 #define CURL_FUNCTIONS \
33 X(global_init) \
34 X(getdate) \
35 X(easy_strerror) \
36 X(easy_init) \
37 X(easy_setopt) \
38 X(easy_cleanup) \
39 X(easy_getinfo) \
40 X(easy_reset) \
41 X(multi_init) \
42 X(multi_add_handle) \
43 X(multi_remove_handle) \
44 X(multi_cleanup) \
45 X(multi_info_read) \
46 X(multi_strerror) \
47 X(multi_socket_action) \
48 X(multi_setopt) \
49 X(share_init) \
50 X(share_cleanup) \
51 X(slist_append) \
52 X(slist_free_all)
53
54 #define X(name) static decltype(&curl_ ## name) name = nullptr;
55 CURL_FUNCTIONS
56 #undef X
57
58 static void* handle = nullptr;
59
load(const char * name)60 static void* load(const char* name) {
61 void* symbol = dlsym(handle, name);
62 if (const char* error = dlerror()) {
63 fprintf(stderr, "Cannot load symbol '%s': %s\n", name, error);
64 dlclose(handle);
65 handle = nullptr;
66 abort();
67 }
68 return symbol;
69 }
70
71 __attribute__((constructor))
load()72 static void load() {
73 assert(!handle);
74 handle = dlopen("libcurl.so.4", RTLD_LAZY | RTLD_LOCAL);
75 if (!handle) {
76 fprintf(stderr, "Could not open shared library '%s'\n", "libcurl.so.4");
77 abort();
78 }
79
80 #define X(name) name = (decltype(&curl_ ## name))load("curl_" #name);
81 CURL_FUNCTIONS
82 #undef X
83 }
84
85 __attribute__((constructor))
unload()86 static void unload() {
87 if (handle) {
88 dlclose(handle);
89 }
90 }
91
92 } // namespace curl
93
94
95 #include <queue>
96 #include <map>
97 #include <cassert>
98 #include <cstring>
99 #include <cstdio>
100
handleError(CURLMcode code)101 static void handleError(CURLMcode code) {
102 if (code != CURLM_OK) {
103 throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(code));
104 }
105 }
106
handleError(CURLcode code)107 static void handleError(CURLcode code) {
108 if (code != CURLE_OK) {
109 throw std::runtime_error(std::string("CURL easy error: ") + curl::easy_strerror(code));
110 }
111 }
112
113 namespace mbgl {
114
115 class HTTPFileSource::Impl {
116 public:
117 Impl();
118 ~Impl();
119
120 static int handleSocket(CURL *handle, curl_socket_t s, int action, void *userp, void *socketp);
121 static int startTimeout(CURLM *multi, long timeout_ms, void *userp);
122 static void onTimeout(HTTPFileSource::Impl *context);
123
124 void perform(curl_socket_t s, util::RunLoop::Event event);
125 CURL *getHandle();
126 void returnHandle(CURL *handle);
127 void checkMultiInfo();
128
129 // Used as the CURL timer function to periodically check for socket updates.
130 util::Timer timeout;
131
132 // CURL multi handle that we use to request multiple URLs at the same time, without having to
133 // block and spawn threads.
134 CURLM *multi = nullptr;
135
136 // CURL share handles are used for sharing session state (e.g.)
137 CURLSH *share = nullptr;
138
139 // A queue that we use for storing resuable CURL easy handles to avoid creating and destroying
140 // them all the time.
141 std::queue<CURL *> handles;
142 };
143
144 class HTTPRequest : public AsyncRequest {
145 public:
146 HTTPRequest(HTTPFileSource::Impl*, Resource, FileSource::Callback);
147 ~HTTPRequest() override;
148
149 void handleResult(CURLcode code);
150
151 private:
152 static size_t headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp);
153 static size_t writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp);
154
155 HTTPFileSource::Impl* context = nullptr;
156 Resource resource;
157 FileSource::Callback callback;
158
159 // Will store the current response.
160 std::shared_ptr<std::string> data;
161 std::unique_ptr<Response> response;
162
163 optional<std::string> retryAfter;
164 optional<std::string> xRateLimitReset;
165
166 CURL *handle = nullptr;
167 curl_slist *headers = nullptr;
168
169 char error[CURL_ERROR_SIZE] = { 0 };
170 };
171
Impl()172 HTTPFileSource::Impl::Impl() {
173 if (curl::global_init(CURL_GLOBAL_ALL)) {
174 throw std::runtime_error("Could not init cURL");
175 }
176
177 share = curl::share_init();
178
179 multi = curl::multi_init();
180 handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETFUNCTION, handleSocket));
181 handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETDATA, this));
182 handleError(curl::multi_setopt(multi, CURLMOPT_TIMERFUNCTION, startTimeout));
183 handleError(curl::multi_setopt(multi, CURLMOPT_TIMERDATA, this));
184 }
185
~Impl()186 HTTPFileSource::Impl::~Impl() {
187 while (!handles.empty()) {
188 curl::easy_cleanup(handles.front());
189 handles.pop();
190 }
191
192 curl::multi_cleanup(multi);
193 multi = nullptr;
194
195 curl::share_cleanup(share);
196 share = nullptr;
197
198 timeout.stop();
199 }
200
getHandle()201 CURL *HTTPFileSource::Impl::getHandle() {
202 if (!handles.empty()) {
203 auto handle = handles.front();
204 handles.pop();
205 return handle;
206 } else {
207 return curl::easy_init();
208 }
209 }
210
returnHandle(CURL * handle)211 void HTTPFileSource::Impl::returnHandle(CURL *handle) {
212 curl::easy_reset(handle);
213 handles.push(handle);
214 }
215
checkMultiInfo()216 void HTTPFileSource::Impl::checkMultiInfo() {
217 CURLMsg *message = nullptr;
218 int pending = 0;
219
220 while ((message = curl::multi_info_read(multi, &pending))) {
221 switch (message->msg) {
222 case CURLMSG_DONE: {
223 HTTPRequest *baton = nullptr;
224 curl::easy_getinfo(message->easy_handle, CURLINFO_PRIVATE, (char *)&baton);
225 assert(baton);
226 baton->handleResult(message->data.result);
227 } break;
228
229 default:
230 // This should never happen, because there are no other message types.
231 throw std::runtime_error("CURLMsg returned unknown message type");
232 }
233 }
234 }
235
perform(curl_socket_t s,util::RunLoop::Event events)236 void HTTPFileSource::Impl::perform(curl_socket_t s, util::RunLoop::Event events) {
237 int flags = 0;
238
239 if (events == util::RunLoop::Event::Read) {
240 flags |= CURL_CSELECT_IN;
241 }
242 if (events == util::RunLoop::Event::Write) {
243 flags |= CURL_CSELECT_OUT;
244 }
245
246
247 int running_handles = 0;
248 curl::multi_socket_action(multi, s, flags, &running_handles);
249 checkMultiInfo();
250 }
251
handleSocket(CURL *,curl_socket_t s,int action,void * userp,void *)252 int HTTPFileSource::Impl::handleSocket(CURL * /* handle */, curl_socket_t s, int action, void *userp,
253 void * /* socketp */) {
254 assert(userp);
255 auto context = reinterpret_cast<Impl *>(userp);
256
257 switch (action) {
258 case CURL_POLL_IN: {
259 using namespace std::placeholders;
260 util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Read,
261 std::bind(&Impl::perform, context, _1, _2));
262 break;
263 }
264 case CURL_POLL_OUT: {
265 using namespace std::placeholders;
266 util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Write,
267 std::bind(&Impl::perform, context, _1, _2));
268 break;
269 }
270 case CURL_POLL_REMOVE:
271 util::RunLoop::Get()->removeWatch(s);
272 break;
273 default:
274 throw std::runtime_error("Unhandled CURL socket action");
275 }
276
277 return 0;
278 }
279
onTimeout(Impl * context)280 void HTTPFileSource::Impl::onTimeout(Impl *context) {
281 int running_handles;
282 CURLMcode error = curl::multi_socket_action(context->multi, CURL_SOCKET_TIMEOUT, 0, &running_handles);
283 if (error != CURLM_OK) {
284 throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(error));
285 }
286 context->checkMultiInfo();
287 }
288
startTimeout(CURLM *,long timeout_ms,void * userp)289 int HTTPFileSource::Impl::startTimeout(CURLM * /* multi */, long timeout_ms, void *userp) {
290 assert(userp);
291 auto context = reinterpret_cast<Impl *>(userp);
292
293 if (timeout_ms < 0) {
294 // A timeout of 0 ms means that the timer will invoked in the next loop iteration.
295 timeout_ms = 0;
296 }
297
298 context->timeout.stop();
299 context->timeout.start(mbgl::Milliseconds(timeout_ms), Duration::zero(),
300 std::bind(&Impl::onTimeout, context));
301
302 return 0;
303 }
304
HTTPRequest(HTTPFileSource::Impl * context_,Resource resource_,FileSource::Callback callback_)305 HTTPRequest::HTTPRequest(HTTPFileSource::Impl* context_, Resource resource_, FileSource::Callback callback_)
306 : context(context_),
307 resource(std::move(resource_)),
308 callback(std::move(callback_)),
309 handle(context->getHandle()) {
310
311 // If there's already a response, set the correct etags/modified headers to make sure we are
312 // getting a 304 response if possible. This avoids redownloading unchanged data.
313 if (resource.priorEtag) {
314 const std::string header = std::string("If-None-Match: ") + *resource.priorEtag;
315 headers = curl::slist_append(headers, header.c_str());
316 } else if (resource.priorModified) {
317 const std::string time =
318 std::string("If-Modified-Since: ") + util::rfc1123(*resource.priorModified);
319 headers = curl::slist_append(headers, time.c_str());
320 }
321
322 if (headers) {
323 curl::easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
324 }
325
326 handleError(curl::easy_setopt(handle, CURLOPT_PRIVATE, this));
327 handleError(curl::easy_setopt(handle, CURLOPT_ERRORBUFFER, error));
328 handleError(curl::easy_setopt(handle, CURLOPT_CAINFO, "ca-bundle.crt"));
329 handleError(curl::easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1));
330 handleError(curl::easy_setopt(handle, CURLOPT_URL, resource.url.c_str()));
331 handleError(curl::easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeCallback));
332 handleError(curl::easy_setopt(handle, CURLOPT_WRITEDATA, this));
333 handleError(curl::easy_setopt(handle, CURLOPT_HEADERFUNCTION, headerCallback));
334 handleError(curl::easy_setopt(handle, CURLOPT_HEADERDATA, this));
335 #if LIBCURL_VERSION_NUM >= ((7) << 16 | (21) << 8 | 6) // Renamed in 7.21.6
336 handleError(curl::easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "gzip, deflate"));
337 #else
338 handleError(curl::easy_setopt(handle, CURLOPT_ENCODING, "gzip, deflate"));
339 #endif
340 handleError(curl::easy_setopt(handle, CURLOPT_USERAGENT, "MapboxGL/1.0"));
341 handleError(curl::easy_setopt(handle, CURLOPT_SHARE, context->share));
342
343 // Start requesting the information.
344 handleError(curl::multi_add_handle(context->multi, handle));
345 }
346
~HTTPRequest()347 HTTPRequest::~HTTPRequest() {
348 handleError(curl::multi_remove_handle(context->multi, handle));
349 context->returnHandle(handle);
350 handle = nullptr;
351
352 if (headers) {
353 curl::slist_free_all(headers);
354 headers = nullptr;
355 }
356 }
357
358 // This function is called when we have new data for a request. We just append it to the string
359 // containing the previous data.
writeCallback(void * const contents,const size_t size,const size_t nmemb,void * userp)360 size_t HTTPRequest::writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp) {
361 assert(userp);
362 auto impl = reinterpret_cast<HTTPRequest *>(userp);
363
364 if (!impl->data) {
365 impl->data = std::make_shared<std::string>();
366 }
367
368 impl->data->append((char *)contents, size * nmemb);
369 return size * nmemb;
370 }
371
372 // Compares the beginning of the (non-zero-terminated!) data buffer with the (zero-terminated!)
373 // header string. If the data buffer contains the header string at the beginning, it returns
374 // the length of the header string == begin of the value, otherwise it returns npos.
375 // The comparison of the header is ASCII-case-insensitive.
headerMatches(const char * const header,const char * const buffer,const size_t length)376 size_t headerMatches(const char *const header, const char *const buffer, const size_t length) {
377 const size_t headerLength = strlen(header);
378 if (length < headerLength) {
379 return std::string::npos;
380 }
381 size_t i = 0;
382 while (i < length && i < headerLength && std::tolower(buffer[i]) == std::tolower(header[i])) {
383 i++;
384 }
385 return i == headerLength ? i : std::string::npos;
386 }
387
headerCallback(char * const buffer,const size_t size,const size_t nmemb,void * userp)388 size_t HTTPRequest::headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp) {
389 assert(userp);
390 auto baton = reinterpret_cast<HTTPRequest *>(userp);
391
392 if (!baton->response) {
393 baton->response = std::make_unique<Response>();
394 }
395
396 const size_t length = size * nmemb;
397 size_t begin = std::string::npos;
398 if ((begin = headerMatches("last-modified: ", buffer, length)) != std::string::npos) {
399 // Always overwrite the modification date; We might already have a value here from the
400 // Date header, but this one is more accurate.
401 const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
402 baton->response->modified = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
403 } else if ((begin = headerMatches("etag: ", buffer, length)) != std::string::npos) {
404 baton->response->etag = std::string(buffer + begin, length - begin - 2); // remove \r\n
405 } else if ((begin = headerMatches("cache-control: ", buffer, length)) != std::string::npos) {
406 const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
407 const auto cc = http::CacheControl::parse(value.c_str());
408 baton->response->expires = cc.toTimePoint();
409 baton->response->mustRevalidate = cc.mustRevalidate;
410 } else if ((begin = headerMatches("expires: ", buffer, length)) != std::string::npos) {
411 const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
412 baton->response->expires = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
413 } else if ((begin = headerMatches("retry-after: ", buffer, length)) != std::string::npos) {
414 baton->retryAfter = std::string(buffer + begin, length - begin - 2); // remove \r\n
415 } else if ((begin = headerMatches("x-rate-limit-reset: ", buffer, length)) != std::string::npos) {
416 baton->xRateLimitReset = std::string(buffer + begin, length - begin - 2); // remove \r\n
417 }
418
419 return length;
420 }
421
handleResult(CURLcode code)422 void HTTPRequest::handleResult(CURLcode code) {
423 // Make sure a response object exists in case we haven't got any headers or content.
424 if (!response) {
425 response = std::make_unique<Response>();
426 }
427
428 using Error = Response::Error;
429
430 // Add human-readable error code
431 if (code != CURLE_OK) {
432 switch (code) {
433 case CURLE_COULDNT_RESOLVE_PROXY:
434 case CURLE_COULDNT_RESOLVE_HOST:
435 case CURLE_COULDNT_CONNECT:
436 case CURLE_OPERATION_TIMEDOUT:
437
438 response->error = std::make_unique<Error>(
439 Error::Reason::Connection, std::string{ curl::easy_strerror(code) } + ": " + error);
440 break;
441
442 default:
443 response->error = std::make_unique<Error>(
444 Error::Reason::Other, std::string{ curl::easy_strerror(code) } + ": " + error);
445 break;
446 }
447 } else {
448 long responseCode = 0;
449 curl::easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responseCode);
450
451 if (responseCode == 200) {
452 if (data) {
453 response->data = std::move(data);
454 } else {
455 response->data = std::make_shared<std::string>();
456 }
457 } else if (responseCode == 204 || (responseCode == 404 && resource.kind == Resource::Kind::Tile)) {
458 response->noContent = true;
459 } else if (responseCode == 304) {
460 response->notModified = true;
461 } else if (responseCode == 404) {
462 response->error =
463 std::make_unique<Error>(Error::Reason::NotFound, "HTTP status code 404");
464 } else if (responseCode == 429) {
465 response->error =
466 std::make_unique<Error>(Error::Reason::RateLimit, "HTTP status code 429",
467 http::parseRetryHeaders(retryAfter, xRateLimitReset));
468 } else if (responseCode >= 500 && responseCode < 600) {
469 response->error =
470 std::make_unique<Error>(Error::Reason::Server, std::string{ "HTTP status code " } +
471 util::toString(responseCode));
472 } else {
473 response->error =
474 std::make_unique<Error>(Error::Reason::Other, std::string{ "HTTP status code " } +
475 util::toString(responseCode));
476 }
477 }
478
479 // Calling `callback` may result in deleting `this`. Copy data to temporaries first.
480 auto callback_ = callback;
481 auto response_ = *response;
482 callback_(response_);
483 }
484
HTTPFileSource()485 HTTPFileSource::HTTPFileSource()
486 : impl(std::make_unique<Impl>()) {
487 }
488
489 HTTPFileSource::~HTTPFileSource() = default;
490
request(const Resource & resource,Callback callback)491 std::unique_ptr<AsyncRequest> HTTPFileSource::request(const Resource& resource, Callback callback) {
492 return std::make_unique<HTTPRequest>(impl.get(), resource, callback);
493 }
494
maximumConcurrentRequests()495 uint32_t HTTPFileSource::maximumConcurrentRequests() {
496 return 20;
497 }
498
499 } // namespace mbgl
500