1 #include <mbgl/storage/http_file_source.hpp>
2 #include <mbgl/storage/resource.hpp>
3 #include <mbgl/storage/response.hpp>
4 #include <mbgl/util/logging.hpp>
5 
6 #include <mbgl/util/util.hpp>
7 #include <mbgl/util/optional.hpp>
8 #include <mbgl/util/run_loop.hpp>
9 #include <mbgl/util/string.hpp>
10 #include <mbgl/util/timer.hpp>
11 #include <mbgl/util/chrono.hpp>
12 #include <mbgl/util/http_header.hpp>
13 
14 #include <curl/curl.h>
15 
16 // Dynamically load all cURL functions. Debian-derived systems upgraded the OpenSSL version linked
17 // to in https://salsa.debian.org/debian/curl/commit/95c94957bb7e89e36e78b995fed468c42f64d18d
18 // They state:
19 //     Rename libcurl3 to libcurl4, because libcurl exposes an SSL_CTX via
20 //     CURLOPT_SSL_CTX_FUNCTION, and this object changes incompatibly between
21 //     openssl 1.0 and openssl 1.1.
22 // Since we are not accessing the underlying OpenSSL context, we don't care whether we're linking
23 // against libcurl3 or libcurl4; both use the ABI version 4 which hasn't changed since 2006
24 // (see https://curl.haxx.se/libcurl/abi.html). In fact, cURL's ABI compatibility is very good as
25 // shown on https://abi-laboratory.pro/tracker/timeline/curl/
26 // Therefore, we're dynamically loading the cURL symbols we need to avoid linking against versioned
27 // symbols.
28 #include <dlfcn.h>
29 
30 namespace curl {
31 
32 #define CURL_FUNCTIONS \
33     X(global_init) \
34     X(getdate) \
35     X(easy_strerror) \
36     X(easy_init) \
37     X(easy_setopt) \
38     X(easy_cleanup) \
39     X(easy_getinfo) \
40     X(easy_reset) \
41     X(multi_init) \
42     X(multi_add_handle) \
43     X(multi_remove_handle) \
44     X(multi_cleanup) \
45     X(multi_info_read) \
46     X(multi_strerror) \
47     X(multi_socket_action) \
48     X(multi_setopt) \
49     X(share_init) \
50     X(share_cleanup) \
51     X(slist_append) \
52     X(slist_free_all)
53 
54 #define X(name) static decltype(&curl_ ## name) name = nullptr;
55 CURL_FUNCTIONS
56 #undef X
57 
58 static void* handle = nullptr;
59 
load(const char * name)60 static void* load(const char* name) {
61     void* symbol = dlsym(handle, name);
62     if (const char* error = dlerror()) {
63         fprintf(stderr, "Cannot load symbol '%s': %s\n", name, error);
64         dlclose(handle);
65         handle = nullptr;
66         abort();
67     }
68     return symbol;
69 }
70 
71 __attribute__((constructor))
load()72 static void load() {
73     assert(!handle);
74     handle = dlopen("libcurl.so.4", RTLD_LAZY | RTLD_LOCAL);
75     if (!handle) {
76         fprintf(stderr, "Could not open shared library '%s'\n", "libcurl.so.4");
77         abort();
78     }
79 
80     #define X(name) name = (decltype(&curl_ ## name))load("curl_" #name);
81     CURL_FUNCTIONS
82     #undef X
83 }
84 
85 __attribute__((constructor))
unload()86 static void unload() {
87     if (handle) {
88         dlclose(handle);
89     }
90 }
91 
92 } // namespace curl
93 
94 
95 #include <queue>
96 #include <map>
97 #include <cassert>
98 #include <cstring>
99 #include <cstdio>
100 
handleError(CURLMcode code)101 static void handleError(CURLMcode code) {
102     if (code != CURLM_OK) {
103         throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(code));
104     }
105 }
106 
handleError(CURLcode code)107 static void handleError(CURLcode code) {
108     if (code != CURLE_OK) {
109         throw std::runtime_error(std::string("CURL easy error: ") + curl::easy_strerror(code));
110     }
111 }
112 
113 namespace mbgl {
114 
115 class HTTPFileSource::Impl {
116 public:
117     Impl();
118     ~Impl();
119 
120     static int handleSocket(CURL *handle, curl_socket_t s, int action, void *userp, void *socketp);
121     static int startTimeout(CURLM *multi, long timeout_ms, void *userp);
122     static void onTimeout(HTTPFileSource::Impl *context);
123 
124     void perform(curl_socket_t s, util::RunLoop::Event event);
125     CURL *getHandle();
126     void returnHandle(CURL *handle);
127     void checkMultiInfo();
128 
129     // Used as the CURL timer function to periodically check for socket updates.
130     util::Timer timeout;
131 
132     // CURL multi handle that we use to request multiple URLs at the same time, without having to
133     // block and spawn threads.
134     CURLM *multi = nullptr;
135 
136     // CURL share handles are used for sharing session state (e.g.)
137     CURLSH *share = nullptr;
138 
139     // A queue that we use for storing resuable CURL easy handles to avoid creating and destroying
140     // them all the time.
141     std::queue<CURL *> handles;
142 };
143 
144 class HTTPRequest : public AsyncRequest {
145 public:
146     HTTPRequest(HTTPFileSource::Impl*, Resource, FileSource::Callback);
147     ~HTTPRequest() override;
148 
149     void handleResult(CURLcode code);
150 
151 private:
152     static size_t headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp);
153     static size_t writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp);
154 
155     HTTPFileSource::Impl* context = nullptr;
156     Resource resource;
157     FileSource::Callback callback;
158 
159     // Will store the current response.
160     std::shared_ptr<std::string> data;
161     std::unique_ptr<Response> response;
162 
163     optional<std::string> retryAfter;
164     optional<std::string> xRateLimitReset;
165 
166     CURL *handle = nullptr;
167     curl_slist *headers = nullptr;
168 
169     char error[CURL_ERROR_SIZE] = { 0 };
170 };
171 
Impl()172 HTTPFileSource::Impl::Impl() {
173     if (curl::global_init(CURL_GLOBAL_ALL)) {
174         throw std::runtime_error("Could not init cURL");
175     }
176 
177     share = curl::share_init();
178 
179     multi = curl::multi_init();
180     handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETFUNCTION, handleSocket));
181     handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETDATA, this));
182     handleError(curl::multi_setopt(multi, CURLMOPT_TIMERFUNCTION, startTimeout));
183     handleError(curl::multi_setopt(multi, CURLMOPT_TIMERDATA, this));
184 }
185 
~Impl()186 HTTPFileSource::Impl::~Impl() {
187     while (!handles.empty()) {
188         curl::easy_cleanup(handles.front());
189         handles.pop();
190     }
191 
192     curl::multi_cleanup(multi);
193     multi = nullptr;
194 
195     curl::share_cleanup(share);
196     share = nullptr;
197 
198     timeout.stop();
199 }
200 
getHandle()201 CURL *HTTPFileSource::Impl::getHandle() {
202     if (!handles.empty()) {
203         auto handle = handles.front();
204         handles.pop();
205         return handle;
206     } else {
207         return curl::easy_init();
208     }
209 }
210 
returnHandle(CURL * handle)211 void HTTPFileSource::Impl::returnHandle(CURL *handle) {
212     curl::easy_reset(handle);
213     handles.push(handle);
214 }
215 
checkMultiInfo()216 void HTTPFileSource::Impl::checkMultiInfo() {
217     CURLMsg *message = nullptr;
218     int pending = 0;
219 
220     while ((message = curl::multi_info_read(multi, &pending))) {
221         switch (message->msg) {
222         case CURLMSG_DONE: {
223             HTTPRequest *baton = nullptr;
224             curl::easy_getinfo(message->easy_handle, CURLINFO_PRIVATE, (char *)&baton);
225             assert(baton);
226             baton->handleResult(message->data.result);
227         } break;
228 
229         default:
230             // This should never happen, because there are no other message types.
231             throw std::runtime_error("CURLMsg returned unknown message type");
232         }
233     }
234 }
235 
perform(curl_socket_t s,util::RunLoop::Event events)236 void HTTPFileSource::Impl::perform(curl_socket_t s, util::RunLoop::Event events) {
237     int flags = 0;
238 
239     if (events == util::RunLoop::Event::Read) {
240         flags |= CURL_CSELECT_IN;
241     }
242     if (events == util::RunLoop::Event::Write) {
243         flags |= CURL_CSELECT_OUT;
244     }
245 
246 
247     int running_handles = 0;
248     curl::multi_socket_action(multi, s, flags, &running_handles);
249     checkMultiInfo();
250 }
251 
handleSocket(CURL *,curl_socket_t s,int action,void * userp,void *)252 int HTTPFileSource::Impl::handleSocket(CURL * /* handle */, curl_socket_t s, int action, void *userp,
253                               void * /* socketp */) {
254     assert(userp);
255     auto context = reinterpret_cast<Impl *>(userp);
256 
257     switch (action) {
258     case CURL_POLL_IN: {
259         using namespace std::placeholders;
260         util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Read,
261                 std::bind(&Impl::perform, context, _1, _2));
262         break;
263     }
264     case CURL_POLL_OUT: {
265         using namespace std::placeholders;
266         util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Write,
267                 std::bind(&Impl::perform, context, _1, _2));
268         break;
269     }
270     case CURL_POLL_REMOVE:
271         util::RunLoop::Get()->removeWatch(s);
272         break;
273     default:
274         throw std::runtime_error("Unhandled CURL socket action");
275     }
276 
277     return 0;
278 }
279 
onTimeout(Impl * context)280 void HTTPFileSource::Impl::onTimeout(Impl *context) {
281     int running_handles;
282     CURLMcode error = curl::multi_socket_action(context->multi, CURL_SOCKET_TIMEOUT, 0, &running_handles);
283     if (error != CURLM_OK) {
284         throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(error));
285     }
286     context->checkMultiInfo();
287 }
288 
startTimeout(CURLM *,long timeout_ms,void * userp)289 int HTTPFileSource::Impl::startTimeout(CURLM * /* multi */, long timeout_ms, void *userp) {
290     assert(userp);
291     auto context = reinterpret_cast<Impl *>(userp);
292 
293     if (timeout_ms < 0) {
294         // A timeout of 0 ms means that the timer will invoked in the next loop iteration.
295         timeout_ms = 0;
296     }
297 
298     context->timeout.stop();
299     context->timeout.start(mbgl::Milliseconds(timeout_ms), Duration::zero(),
300         std::bind(&Impl::onTimeout, context));
301 
302     return 0;
303 }
304 
HTTPRequest(HTTPFileSource::Impl * context_,Resource resource_,FileSource::Callback callback_)305 HTTPRequest::HTTPRequest(HTTPFileSource::Impl* context_, Resource resource_, FileSource::Callback callback_)
306     : context(context_),
307       resource(std::move(resource_)),
308       callback(std::move(callback_)),
309       handle(context->getHandle()) {
310 
311     // If there's already a response, set the correct etags/modified headers to make sure we are
312     // getting a 304 response if possible. This avoids redownloading unchanged data.
313     if (resource.priorEtag) {
314         const std::string header = std::string("If-None-Match: ") + *resource.priorEtag;
315         headers = curl::slist_append(headers, header.c_str());
316     } else if (resource.priorModified) {
317         const std::string time =
318             std::string("If-Modified-Since: ") + util::rfc1123(*resource.priorModified);
319         headers = curl::slist_append(headers, time.c_str());
320     }
321 
322     if (headers) {
323         curl::easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
324     }
325 
326     handleError(curl::easy_setopt(handle, CURLOPT_PRIVATE, this));
327     handleError(curl::easy_setopt(handle, CURLOPT_ERRORBUFFER, error));
328     handleError(curl::easy_setopt(handle, CURLOPT_CAINFO, "ca-bundle.crt"));
329     handleError(curl::easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1));
330     handleError(curl::easy_setopt(handle, CURLOPT_URL, resource.url.c_str()));
331     handleError(curl::easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeCallback));
332     handleError(curl::easy_setopt(handle, CURLOPT_WRITEDATA, this));
333     handleError(curl::easy_setopt(handle, CURLOPT_HEADERFUNCTION, headerCallback));
334     handleError(curl::easy_setopt(handle, CURLOPT_HEADERDATA, this));
335 #if LIBCURL_VERSION_NUM >= ((7) << 16 | (21) << 8 | 6) // Renamed in 7.21.6
336     handleError(curl::easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "gzip, deflate"));
337 #else
338     handleError(curl::easy_setopt(handle, CURLOPT_ENCODING, "gzip, deflate"));
339 #endif
340     handleError(curl::easy_setopt(handle, CURLOPT_USERAGENT, "MapboxGL/1.0"));
341     handleError(curl::easy_setopt(handle, CURLOPT_SHARE, context->share));
342 
343     // Start requesting the information.
344     handleError(curl::multi_add_handle(context->multi, handle));
345 }
346 
~HTTPRequest()347 HTTPRequest::~HTTPRequest() {
348     handleError(curl::multi_remove_handle(context->multi, handle));
349     context->returnHandle(handle);
350     handle = nullptr;
351 
352     if (headers) {
353         curl::slist_free_all(headers);
354         headers = nullptr;
355     }
356 }
357 
358 // This function is called when we have new data for a request. We just append it to the string
359 // containing the previous data.
writeCallback(void * const contents,const size_t size,const size_t nmemb,void * userp)360 size_t HTTPRequest::writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp) {
361     assert(userp);
362     auto impl = reinterpret_cast<HTTPRequest *>(userp);
363 
364     if (!impl->data) {
365         impl->data = std::make_shared<std::string>();
366     }
367 
368     impl->data->append((char *)contents, size * nmemb);
369     return size * nmemb;
370 }
371 
372 // Compares the beginning of the (non-zero-terminated!) data buffer with the (zero-terminated!)
373 // header string. If the data buffer contains the header string at the beginning, it returns
374 // the length of the header string == begin of the value, otherwise it returns npos.
375 // The comparison of the header is ASCII-case-insensitive.
headerMatches(const char * const header,const char * const buffer,const size_t length)376 size_t headerMatches(const char *const header, const char *const buffer, const size_t length) {
377     const size_t headerLength = strlen(header);
378     if (length < headerLength) {
379         return std::string::npos;
380     }
381     size_t i = 0;
382     while (i < length && i < headerLength && std::tolower(buffer[i]) == std::tolower(header[i])) {
383         i++;
384     }
385     return i == headerLength ? i : std::string::npos;
386 }
387 
headerCallback(char * const buffer,const size_t size,const size_t nmemb,void * userp)388 size_t HTTPRequest::headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp) {
389     assert(userp);
390     auto baton = reinterpret_cast<HTTPRequest *>(userp);
391 
392     if (!baton->response) {
393         baton->response = std::make_unique<Response>();
394     }
395 
396     const size_t length = size * nmemb;
397     size_t begin = std::string::npos;
398     if ((begin = headerMatches("last-modified: ", buffer, length)) != std::string::npos) {
399         // Always overwrite the modification date; We might already have a value here from the
400         // Date header, but this one is more accurate.
401         const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
402         baton->response->modified = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
403     } else if ((begin = headerMatches("etag: ", buffer, length)) != std::string::npos) {
404         baton->response->etag = std::string(buffer + begin, length - begin - 2); // remove \r\n
405     } else if ((begin = headerMatches("cache-control: ", buffer, length)) != std::string::npos) {
406         const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
407         const auto cc = http::CacheControl::parse(value.c_str());
408         baton->response->expires = cc.toTimePoint();
409         baton->response->mustRevalidate = cc.mustRevalidate;
410     } else if ((begin = headerMatches("expires: ", buffer, length)) != std::string::npos) {
411         const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
412         baton->response->expires = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
413     } else if ((begin = headerMatches("retry-after: ", buffer, length)) != std::string::npos) {
414         baton->retryAfter = std::string(buffer + begin, length - begin - 2); // remove \r\n
415     } else if ((begin = headerMatches("x-rate-limit-reset: ", buffer, length)) != std::string::npos) {
416         baton->xRateLimitReset = std::string(buffer + begin, length - begin - 2); // remove \r\n
417     }
418 
419     return length;
420 }
421 
handleResult(CURLcode code)422 void HTTPRequest::handleResult(CURLcode code) {
423     // Make sure a response object exists in case we haven't got any headers or content.
424     if (!response) {
425         response = std::make_unique<Response>();
426     }
427 
428     using Error = Response::Error;
429 
430     // Add human-readable error code
431     if (code != CURLE_OK) {
432         switch (code) {
433         case CURLE_COULDNT_RESOLVE_PROXY:
434         case CURLE_COULDNT_RESOLVE_HOST:
435         case CURLE_COULDNT_CONNECT:
436         case CURLE_OPERATION_TIMEDOUT:
437 
438             response->error = std::make_unique<Error>(
439                 Error::Reason::Connection, std::string{ curl::easy_strerror(code) } + ": " + error);
440             break;
441 
442         default:
443             response->error = std::make_unique<Error>(
444                 Error::Reason::Other, std::string{ curl::easy_strerror(code) } + ": " + error);
445             break;
446         }
447     } else {
448         long responseCode = 0;
449         curl::easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responseCode);
450 
451         if (responseCode == 200) {
452             if (data) {
453                 response->data = std::move(data);
454             } else {
455                 response->data = std::make_shared<std::string>();
456             }
457         } else if (responseCode == 204 || (responseCode == 404 && resource.kind == Resource::Kind::Tile)) {
458             response->noContent = true;
459         } else if (responseCode == 304) {
460             response->notModified = true;
461         } else if (responseCode == 404) {
462             response->error =
463                 std::make_unique<Error>(Error::Reason::NotFound, "HTTP status code 404");
464         } else if (responseCode == 429) {
465             response->error =
466                 std::make_unique<Error>(Error::Reason::RateLimit, "HTTP status code 429",
467                                         http::parseRetryHeaders(retryAfter, xRateLimitReset));
468         } else if (responseCode >= 500 && responseCode < 600) {
469             response->error =
470                 std::make_unique<Error>(Error::Reason::Server, std::string{ "HTTP status code " } +
471                                                                    util::toString(responseCode));
472         } else {
473             response->error =
474                 std::make_unique<Error>(Error::Reason::Other, std::string{ "HTTP status code " } +
475                                                                   util::toString(responseCode));
476         }
477     }
478 
479     // Calling `callback` may result in deleting `this`. Copy data to temporaries first.
480     auto callback_ = callback;
481     auto response_ = *response;
482     callback_(response_);
483 }
484 
HTTPFileSource()485 HTTPFileSource::HTTPFileSource()
486     : impl(std::make_unique<Impl>()) {
487 }
488 
489 HTTPFileSource::~HTTPFileSource() = default;
490 
request(const Resource & resource,Callback callback)491 std::unique_ptr<AsyncRequest> HTTPFileSource::request(const Resource& resource, Callback callback) {
492     return std::make_unique<HTTPRequest>(impl.get(), resource, callback);
493 }
494 
maximumConcurrentRequests()495 uint32_t HTTPFileSource::maximumConcurrentRequests() {
496     return 20;
497 }
498 
499 } // namespace mbgl
500