1*4882a593Smuzhiyun""" 2*4882a593SmuzhiyunBitBake 'Fetch' implementations 3*4882a593Smuzhiyun 4*4882a593SmuzhiyunClasses for obtaining upstream sources for the 5*4882a593SmuzhiyunBitBake build tools. 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun""" 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun# Copyright (C) 2003, 2004 Chris Larson 10*4882a593Smuzhiyun# 11*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only 12*4882a593Smuzhiyun# 13*4882a593Smuzhiyun# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14*4882a593Smuzhiyun 15*4882a593Smuzhiyunimport shlex 16*4882a593Smuzhiyunimport re 17*4882a593Smuzhiyunimport tempfile 18*4882a593Smuzhiyunimport os 19*4882a593Smuzhiyunimport errno 20*4882a593Smuzhiyunimport bb 21*4882a593Smuzhiyunimport bb.progress 22*4882a593Smuzhiyunimport socket 23*4882a593Smuzhiyunimport http.client 24*4882a593Smuzhiyunimport urllib.request, urllib.parse, urllib.error 25*4882a593Smuzhiyunfrom bb.fetch2 import FetchMethod 26*4882a593Smuzhiyunfrom bb.fetch2 import FetchError 27*4882a593Smuzhiyunfrom bb.fetch2 import logger 28*4882a593Smuzhiyunfrom bb.fetch2 import runfetchcmd 29*4882a593Smuzhiyunfrom bb.utils import export_proxies 30*4882a593Smuzhiyunfrom bs4 import BeautifulSoup 31*4882a593Smuzhiyunfrom bs4 import SoupStrainer 32*4882a593Smuzhiyun 33*4882a593Smuzhiyunclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 34*4882a593Smuzhiyun """ 35*4882a593Smuzhiyun Extract progress information from wget output. 36*4882a593Smuzhiyun Note: relies on --progress=dot (with -v or without -q/-nv) being 37*4882a593Smuzhiyun specified on the wget command line. 38*4882a593Smuzhiyun """ 39*4882a593Smuzhiyun def __init__(self, d): 40*4882a593Smuzhiyun super(WgetProgressHandler, self).__init__(d) 41*4882a593Smuzhiyun # Send an initial progress event so the bar gets shown 42*4882a593Smuzhiyun self._fire_progress(0) 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun def writeline(self, line): 45*4882a593Smuzhiyun percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 46*4882a593Smuzhiyun if percs: 47*4882a593Smuzhiyun progress = int(percs[-1][0]) 48*4882a593Smuzhiyun rate = percs[-1][1] + '/s' 49*4882a593Smuzhiyun self.update(progress, rate) 50*4882a593Smuzhiyun return False 51*4882a593Smuzhiyun return True 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun 54*4882a593Smuzhiyunclass Wget(FetchMethod): 55*4882a593Smuzhiyun """Class to fetch urls via 'wget'""" 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun # CDNs like CloudFlare may do a 'browser integrity test' which can fail 58*4882a593Smuzhiyun # with the standard wget/urllib User-Agent, so pretend to be a modern 59*4882a593Smuzhiyun # browser. 60*4882a593Smuzhiyun user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun def check_certs(self, d): 63*4882a593Smuzhiyun """ 64*4882a593Smuzhiyun Should certificates be checked? 65*4882a593Smuzhiyun """ 66*4882a593Smuzhiyun return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun def supports(self, ud, d): 69*4882a593Smuzhiyun """ 70*4882a593Smuzhiyun Check to see if a given url can be fetched with wget. 71*4882a593Smuzhiyun """ 72*4882a593Smuzhiyun return ud.type in ['http', 'https', 'ftp', 'ftps'] 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun def recommends_checksum(self, urldata): 75*4882a593Smuzhiyun return True 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun def urldata_init(self, ud, d): 78*4882a593Smuzhiyun if 'protocol' in ud.parm: 79*4882a593Smuzhiyun if ud.parm['protocol'] == 'git': 80*4882a593Smuzhiyun raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun if 'downloadfilename' in ud.parm: 83*4882a593Smuzhiyun ud.basename = ud.parm['downloadfilename'] 84*4882a593Smuzhiyun else: 85*4882a593Smuzhiyun ud.basename = os.path.basename(ud.path) 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 88*4882a593Smuzhiyun if not ud.localfile: 89*4882a593Smuzhiyun ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun if not self.check_certs(d): 94*4882a593Smuzhiyun self.basecmd += " --no-check-certificate" 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun def _runwget(self, ud, d, command, quiet, workdir=None): 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun progresshandler = WgetProgressHandler(d) 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 101*4882a593Smuzhiyun bb.fetch2.check_network_access(d, command, ud.url) 102*4882a593Smuzhiyun runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun def download(self, ud, d): 105*4882a593Smuzhiyun """Fetch urls""" 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun fetchcmd = self.basecmd 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun if 'downloadfilename' in ud.parm: 110*4882a593Smuzhiyun localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 111*4882a593Smuzhiyun bb.utils.mkdirhier(os.path.dirname(localpath)) 112*4882a593Smuzhiyun fetchcmd += " -O %s" % shlex.quote(localpath) 113*4882a593Smuzhiyun 114*4882a593Smuzhiyun if ud.user and ud.pswd: 115*4882a593Smuzhiyun fetchcmd += " --auth-no-challenge" 116*4882a593Smuzhiyun if ud.parm.get("redirectauth", "1") == "1": 117*4882a593Smuzhiyun # An undocumented feature of wget is that if the 118*4882a593Smuzhiyun # username/password are specified on the URI, wget will only 119*4882a593Smuzhiyun # send the Authorization header to the first host and not to 120*4882a593Smuzhiyun # any hosts that it is redirected to. With the increasing 121*4882a593Smuzhiyun # usage of temporary AWS URLs, this difference now matters as 122*4882a593Smuzhiyun # AWS will reject any request that has authentication both in 123*4882a593Smuzhiyun # the query parameters (from the redirect) and in the 124*4882a593Smuzhiyun # Authorization header. 125*4882a593Smuzhiyun fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun uri = ud.url.split(";")[0] 128*4882a593Smuzhiyun if os.path.exists(ud.localpath): 129*4882a593Smuzhiyun # file exists, but we didnt complete it.. trying again.. 130*4882a593Smuzhiyun fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 131*4882a593Smuzhiyun else: 132*4882a593Smuzhiyun fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun self._runwget(ud, d, fetchcmd, False) 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun # Sanity check since wget can pretend it succeed when it didn't 137*4882a593Smuzhiyun # Also, this used to happen if sourceforge sent us to the mirror page 138*4882a593Smuzhiyun if not os.path.exists(ud.localpath): 139*4882a593Smuzhiyun raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun if os.path.getsize(ud.localpath) == 0: 142*4882a593Smuzhiyun os.remove(ud.localpath) 143*4882a593Smuzhiyun raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun return True 146*4882a593Smuzhiyun 147*4882a593Smuzhiyun def checkstatus(self, fetch, ud, d, try_again=True): 148*4882a593Smuzhiyun class HTTPConnectionCache(http.client.HTTPConnection): 149*4882a593Smuzhiyun if fetch.connection_cache: 150*4882a593Smuzhiyun def connect(self): 151*4882a593Smuzhiyun """Connect to the host and port specified in __init__.""" 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun sock = fetch.connection_cache.get_connection(self.host, self.port) 154*4882a593Smuzhiyun if sock: 155*4882a593Smuzhiyun self.sock = sock 156*4882a593Smuzhiyun else: 157*4882a593Smuzhiyun self.sock = socket.create_connection((self.host, self.port), 158*4882a593Smuzhiyun self.timeout, self.source_address) 159*4882a593Smuzhiyun fetch.connection_cache.add_connection(self.host, self.port, self.sock) 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun if self._tunnel_host: 162*4882a593Smuzhiyun self._tunnel() 163*4882a593Smuzhiyun 164*4882a593Smuzhiyun class CacheHTTPHandler(urllib.request.HTTPHandler): 165*4882a593Smuzhiyun def http_open(self, req): 166*4882a593Smuzhiyun return self.do_open(HTTPConnectionCache, req) 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun def do_open(self, http_class, req): 169*4882a593Smuzhiyun """Return an addinfourl object for the request, using http_class. 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun http_class must implement the HTTPConnection API from httplib. 172*4882a593Smuzhiyun The addinfourl return value is a file-like object. It also 173*4882a593Smuzhiyun has methods and attributes including: 174*4882a593Smuzhiyun - info(): return a mimetools.Message object for the headers 175*4882a593Smuzhiyun - geturl(): return the original request URL 176*4882a593Smuzhiyun - code: HTTP status code 177*4882a593Smuzhiyun """ 178*4882a593Smuzhiyun host = req.host 179*4882a593Smuzhiyun if not host: 180*4882a593Smuzhiyun raise urllib.error.URLError('no host given') 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun h = http_class(host, timeout=req.timeout) # will parse host:port 183*4882a593Smuzhiyun h.set_debuglevel(self._debuglevel) 184*4882a593Smuzhiyun 185*4882a593Smuzhiyun headers = dict(req.unredirected_hdrs) 186*4882a593Smuzhiyun headers.update(dict((k, v) for k, v in list(req.headers.items()) 187*4882a593Smuzhiyun if k not in headers)) 188*4882a593Smuzhiyun 189*4882a593Smuzhiyun # We want to make an HTTP/1.1 request, but the addinfourl 190*4882a593Smuzhiyun # class isn't prepared to deal with a persistent connection. 191*4882a593Smuzhiyun # It will try to read all remaining data from the socket, 192*4882a593Smuzhiyun # which will block while the server waits for the next request. 193*4882a593Smuzhiyun # So make sure the connection gets closed after the (only) 194*4882a593Smuzhiyun # request. 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun # Don't close connection when connection_cache is enabled, 197*4882a593Smuzhiyun if fetch.connection_cache is None: 198*4882a593Smuzhiyun headers["Connection"] = "close" 199*4882a593Smuzhiyun else: 200*4882a593Smuzhiyun headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun headers = dict( 203*4882a593Smuzhiyun (name.title(), val) for name, val in list(headers.items())) 204*4882a593Smuzhiyun 205*4882a593Smuzhiyun if req._tunnel_host: 206*4882a593Smuzhiyun tunnel_headers = {} 207*4882a593Smuzhiyun proxy_auth_hdr = "Proxy-Authorization" 208*4882a593Smuzhiyun if proxy_auth_hdr in headers: 209*4882a593Smuzhiyun tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 210*4882a593Smuzhiyun # Proxy-Authorization should not be sent to origin 211*4882a593Smuzhiyun # server. 212*4882a593Smuzhiyun del headers[proxy_auth_hdr] 213*4882a593Smuzhiyun h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 214*4882a593Smuzhiyun 215*4882a593Smuzhiyun try: 216*4882a593Smuzhiyun h.request(req.get_method(), req.selector, req.data, headers) 217*4882a593Smuzhiyun except socket.error as err: # XXX what error? 218*4882a593Smuzhiyun # Don't close connection when cache is enabled. 219*4882a593Smuzhiyun # Instead, try to detect connections that are no longer 220*4882a593Smuzhiyun # usable (for example, closed unexpectedly) and remove 221*4882a593Smuzhiyun # them from the cache. 222*4882a593Smuzhiyun if fetch.connection_cache is None: 223*4882a593Smuzhiyun h.close() 224*4882a593Smuzhiyun elif isinstance(err, OSError) and err.errno == errno.EBADF: 225*4882a593Smuzhiyun # This happens when the server closes the connection despite the Keep-Alive. 226*4882a593Smuzhiyun # Apparently urllib then uses the file descriptor, expecting it to be 227*4882a593Smuzhiyun # connected, when in reality the connection is already gone. 228*4882a593Smuzhiyun # We let the request fail and expect it to be 229*4882a593Smuzhiyun # tried once more ("try_again" in check_status()), 230*4882a593Smuzhiyun # with the dead connection removed from the cache. 231*4882a593Smuzhiyun # If it still fails, we give up, which can happen for bad 232*4882a593Smuzhiyun # HTTP proxy settings. 233*4882a593Smuzhiyun fetch.connection_cache.remove_connection(h.host, h.port) 234*4882a593Smuzhiyun raise urllib.error.URLError(err) 235*4882a593Smuzhiyun else: 236*4882a593Smuzhiyun r = h.getresponse() 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun # Pick apart the HTTPResponse object to get the addinfourl 239*4882a593Smuzhiyun # object initialized properly. 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun # Wrap the HTTPResponse object in socket's file object adapter 242*4882a593Smuzhiyun # for Windows. That adapter calls recv(), so delegate recv() 243*4882a593Smuzhiyun # to read(). This weird wrapping allows the returned object to 244*4882a593Smuzhiyun # have readline() and readlines() methods. 245*4882a593Smuzhiyun 246*4882a593Smuzhiyun # XXX It might be better to extract the read buffering code 247*4882a593Smuzhiyun # out of socket._fileobject() and into a base class. 248*4882a593Smuzhiyun r.recv = r.read 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun # no data, just have to read 251*4882a593Smuzhiyun r.read() 252*4882a593Smuzhiyun class fp_dummy(object): 253*4882a593Smuzhiyun def read(self): 254*4882a593Smuzhiyun return "" 255*4882a593Smuzhiyun def readline(self): 256*4882a593Smuzhiyun return "" 257*4882a593Smuzhiyun def close(self): 258*4882a593Smuzhiyun pass 259*4882a593Smuzhiyun closed = False 260*4882a593Smuzhiyun 261*4882a593Smuzhiyun resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 262*4882a593Smuzhiyun resp.code = r.status 263*4882a593Smuzhiyun resp.msg = r.reason 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun # Close connection when server request it. 266*4882a593Smuzhiyun if fetch.connection_cache is not None: 267*4882a593Smuzhiyun if 'Connection' in r.msg and r.msg['Connection'] == 'close': 268*4882a593Smuzhiyun fetch.connection_cache.remove_connection(h.host, h.port) 269*4882a593Smuzhiyun 270*4882a593Smuzhiyun return resp 271*4882a593Smuzhiyun 272*4882a593Smuzhiyun class HTTPMethodFallback(urllib.request.BaseHandler): 273*4882a593Smuzhiyun """ 274*4882a593Smuzhiyun Fallback to GET if HEAD is not allowed (405 HTTP error) 275*4882a593Smuzhiyun """ 276*4882a593Smuzhiyun def http_error_405(self, req, fp, code, msg, headers): 277*4882a593Smuzhiyun fp.read() 278*4882a593Smuzhiyun fp.close() 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun if req.get_method() != 'GET': 281*4882a593Smuzhiyun newheaders = dict((k, v) for k, v in list(req.headers.items()) 282*4882a593Smuzhiyun if k.lower() not in ("content-length", "content-type")) 283*4882a593Smuzhiyun return self.parent.open(urllib.request.Request(req.get_full_url(), 284*4882a593Smuzhiyun headers=newheaders, 285*4882a593Smuzhiyun origin_req_host=req.origin_req_host, 286*4882a593Smuzhiyun unverifiable=True)) 287*4882a593Smuzhiyun 288*4882a593Smuzhiyun raise urllib.request.HTTPError(req, code, msg, headers, None) 289*4882a593Smuzhiyun 290*4882a593Smuzhiyun # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 291*4882a593Smuzhiyun # Forbidden when they actually mean 405 Method Not Allowed. 292*4882a593Smuzhiyun http_error_403 = http_error_405 293*4882a593Smuzhiyun 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 296*4882a593Smuzhiyun """ 297*4882a593Smuzhiyun urllib2.HTTPRedirectHandler resets the method to GET on redirect, 298*4882a593Smuzhiyun when we want to follow redirects using the original method. 299*4882a593Smuzhiyun """ 300*4882a593Smuzhiyun def redirect_request(self, req, fp, code, msg, headers, newurl): 301*4882a593Smuzhiyun newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 302*4882a593Smuzhiyun newreq.get_method = req.get_method 303*4882a593Smuzhiyun return newreq 304*4882a593Smuzhiyun 305*4882a593Smuzhiyun # We need to update the environment here as both the proxy and HTTPS 306*4882a593Smuzhiyun # handlers need variables set. The proxy needs http_proxy and friends to 307*4882a593Smuzhiyun # be set, and HTTPSHandler ends up calling into openssl to load the 308*4882a593Smuzhiyun # certificates. In buildtools configurations this will be looking at the 309*4882a593Smuzhiyun # wrong place for certificates by default: we set SSL_CERT_FILE to the 310*4882a593Smuzhiyun # right location in the buildtools environment script but as BitBake 311*4882a593Smuzhiyun # prunes prunes the environment this is lost. When binaries are executed 312*4882a593Smuzhiyun # runfetchcmd ensures these values are in the environment, but this is 313*4882a593Smuzhiyun # pure Python so we need to update the environment. 314*4882a593Smuzhiyun # 315*4882a593Smuzhiyun # Avoid tramping the environment too much by using bb.utils.environment 316*4882a593Smuzhiyun # to scope the changes to the build_opener request, which is when the 317*4882a593Smuzhiyun # environment lookups happen. 318*4882a593Smuzhiyun newenv = bb.fetch2.get_fetcher_environment(d) 319*4882a593Smuzhiyun 320*4882a593Smuzhiyun with bb.utils.environment(**newenv): 321*4882a593Smuzhiyun import ssl 322*4882a593Smuzhiyun 323*4882a593Smuzhiyun if self.check_certs(d): 324*4882a593Smuzhiyun context = ssl.create_default_context() 325*4882a593Smuzhiyun else: 326*4882a593Smuzhiyun context = ssl._create_unverified_context() 327*4882a593Smuzhiyun 328*4882a593Smuzhiyun handlers = [FixedHTTPRedirectHandler, 329*4882a593Smuzhiyun HTTPMethodFallback, 330*4882a593Smuzhiyun urllib.request.ProxyHandler(), 331*4882a593Smuzhiyun CacheHTTPHandler(), 332*4882a593Smuzhiyun urllib.request.HTTPSHandler(context=context)] 333*4882a593Smuzhiyun opener = urllib.request.build_opener(*handlers) 334*4882a593Smuzhiyun 335*4882a593Smuzhiyun try: 336*4882a593Smuzhiyun uri = ud.url.split(";")[0] 337*4882a593Smuzhiyun r = urllib.request.Request(uri) 338*4882a593Smuzhiyun r.get_method = lambda: "HEAD" 339*4882a593Smuzhiyun # Some servers (FusionForge, as used on Alioth) require that the 340*4882a593Smuzhiyun # optional Accept header is set. 341*4882a593Smuzhiyun r.add_header("Accept", "*/*") 342*4882a593Smuzhiyun r.add_header("User-Agent", self.user_agent) 343*4882a593Smuzhiyun def add_basic_auth(login_str, request): 344*4882a593Smuzhiyun '''Adds Basic auth to http request, pass in login:password as string''' 345*4882a593Smuzhiyun import base64 346*4882a593Smuzhiyun encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 347*4882a593Smuzhiyun authheader = "Basic %s" % encodeuser 348*4882a593Smuzhiyun r.add_header("Authorization", authheader) 349*4882a593Smuzhiyun 350*4882a593Smuzhiyun if ud.user and ud.pswd: 351*4882a593Smuzhiyun add_basic_auth(ud.user + ':' + ud.pswd, r) 352*4882a593Smuzhiyun 353*4882a593Smuzhiyun try: 354*4882a593Smuzhiyun import netrc 355*4882a593Smuzhiyun n = netrc.netrc() 356*4882a593Smuzhiyun login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 357*4882a593Smuzhiyun add_basic_auth("%s:%s" % (login, password), r) 358*4882a593Smuzhiyun except (TypeError, ImportError, IOError, netrc.NetrcParseError): 359*4882a593Smuzhiyun pass 360*4882a593Smuzhiyun 361*4882a593Smuzhiyun with opener.open(r, timeout=30) as response: 362*4882a593Smuzhiyun pass 363*4882a593Smuzhiyun except urllib.error.URLError as e: 364*4882a593Smuzhiyun if try_again: 365*4882a593Smuzhiyun logger.debug2("checkstatus: trying again") 366*4882a593Smuzhiyun return self.checkstatus(fetch, ud, d, False) 367*4882a593Smuzhiyun else: 368*4882a593Smuzhiyun # debug for now to avoid spamming the logs in e.g. remote sstate searches 369*4882a593Smuzhiyun logger.debug2("checkstatus() urlopen failed: %s" % e) 370*4882a593Smuzhiyun return False 371*4882a593Smuzhiyun except ConnectionResetError as e: 372*4882a593Smuzhiyun if try_again: 373*4882a593Smuzhiyun logger.debug2("checkstatus: trying again") 374*4882a593Smuzhiyun return self.checkstatus(fetch, ud, d, False) 375*4882a593Smuzhiyun else: 376*4882a593Smuzhiyun # debug for now to avoid spamming the logs in e.g. remote sstate searches 377*4882a593Smuzhiyun logger.debug2("checkstatus() urlopen failed: %s" % e) 378*4882a593Smuzhiyun return False 379*4882a593Smuzhiyun 380*4882a593Smuzhiyun return True 381*4882a593Smuzhiyun 382*4882a593Smuzhiyun def _parse_path(self, regex, s): 383*4882a593Smuzhiyun """ 384*4882a593Smuzhiyun Find and group name, version and archive type in the given string s 385*4882a593Smuzhiyun """ 386*4882a593Smuzhiyun 387*4882a593Smuzhiyun m = regex.search(s) 388*4882a593Smuzhiyun if m: 389*4882a593Smuzhiyun pname = '' 390*4882a593Smuzhiyun pver = '' 391*4882a593Smuzhiyun ptype = '' 392*4882a593Smuzhiyun 393*4882a593Smuzhiyun mdict = m.groupdict() 394*4882a593Smuzhiyun if 'name' in mdict.keys(): 395*4882a593Smuzhiyun pname = mdict['name'] 396*4882a593Smuzhiyun if 'pver' in mdict.keys(): 397*4882a593Smuzhiyun pver = mdict['pver'] 398*4882a593Smuzhiyun if 'type' in mdict.keys(): 399*4882a593Smuzhiyun ptype = mdict['type'] 400*4882a593Smuzhiyun 401*4882a593Smuzhiyun bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 402*4882a593Smuzhiyun 403*4882a593Smuzhiyun return (pname, pver, ptype) 404*4882a593Smuzhiyun 405*4882a593Smuzhiyun return None 406*4882a593Smuzhiyun 407*4882a593Smuzhiyun def _modelate_version(self, version): 408*4882a593Smuzhiyun if version[0] in ['.', '-']: 409*4882a593Smuzhiyun if version[1].isdigit(): 410*4882a593Smuzhiyun version = version[1] + version[0] + version[2:len(version)] 411*4882a593Smuzhiyun else: 412*4882a593Smuzhiyun version = version[1:len(version)] 413*4882a593Smuzhiyun 414*4882a593Smuzhiyun version = re.sub('-', '.', version) 415*4882a593Smuzhiyun version = re.sub('_', '.', version) 416*4882a593Smuzhiyun version = re.sub('(rc)+', '.1000.', version) 417*4882a593Smuzhiyun version = re.sub('(beta)+', '.100.', version) 418*4882a593Smuzhiyun version = re.sub('(alpha)+', '.10.', version) 419*4882a593Smuzhiyun if version[0] == 'v': 420*4882a593Smuzhiyun version = version[1:len(version)] 421*4882a593Smuzhiyun return version 422*4882a593Smuzhiyun 423*4882a593Smuzhiyun def _vercmp(self, old, new): 424*4882a593Smuzhiyun """ 425*4882a593Smuzhiyun Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 426*4882a593Smuzhiyun purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 427*4882a593Smuzhiyun for simplicity as it's somehow difficult to get from various upstream format 428*4882a593Smuzhiyun """ 429*4882a593Smuzhiyun 430*4882a593Smuzhiyun (oldpn, oldpv, oldsuffix) = old 431*4882a593Smuzhiyun (newpn, newpv, newsuffix) = new 432*4882a593Smuzhiyun 433*4882a593Smuzhiyun # Check for a new suffix type that we have never heard of before 434*4882a593Smuzhiyun if newsuffix: 435*4882a593Smuzhiyun m = self.suffix_regex_comp.search(newsuffix) 436*4882a593Smuzhiyun if not m: 437*4882a593Smuzhiyun bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 438*4882a593Smuzhiyun return False 439*4882a593Smuzhiyun 440*4882a593Smuzhiyun # Not our package so ignore it 441*4882a593Smuzhiyun if oldpn != newpn: 442*4882a593Smuzhiyun return False 443*4882a593Smuzhiyun 444*4882a593Smuzhiyun oldpv = self._modelate_version(oldpv) 445*4882a593Smuzhiyun newpv = self._modelate_version(newpv) 446*4882a593Smuzhiyun 447*4882a593Smuzhiyun return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 448*4882a593Smuzhiyun 449*4882a593Smuzhiyun def _fetch_index(self, uri, ud, d): 450*4882a593Smuzhiyun """ 451*4882a593Smuzhiyun Run fetch checkstatus to get directory information 452*4882a593Smuzhiyun """ 453*4882a593Smuzhiyun f = tempfile.NamedTemporaryFile() 454*4882a593Smuzhiyun with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 455*4882a593Smuzhiyun fetchcmd = self.basecmd 456*4882a593Smuzhiyun fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 457*4882a593Smuzhiyun try: 458*4882a593Smuzhiyun self._runwget(ud, d, fetchcmd, True, workdir=workdir) 459*4882a593Smuzhiyun fetchresult = f.read() 460*4882a593Smuzhiyun except bb.fetch2.BBFetchException: 461*4882a593Smuzhiyun fetchresult = "" 462*4882a593Smuzhiyun 463*4882a593Smuzhiyun return fetchresult 464*4882a593Smuzhiyun 465*4882a593Smuzhiyun def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 466*4882a593Smuzhiyun """ 467*4882a593Smuzhiyun Return the latest version of a package inside a given directory path 468*4882a593Smuzhiyun If error or no version, return "" 469*4882a593Smuzhiyun """ 470*4882a593Smuzhiyun valid = 0 471*4882a593Smuzhiyun version = ['', '', ''] 472*4882a593Smuzhiyun 473*4882a593Smuzhiyun bb.debug(3, "VersionURL: %s" % (url)) 474*4882a593Smuzhiyun soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 475*4882a593Smuzhiyun if not soup: 476*4882a593Smuzhiyun bb.debug(3, "*** %s NO SOUP" % (url)) 477*4882a593Smuzhiyun return "" 478*4882a593Smuzhiyun 479*4882a593Smuzhiyun for line in soup.find_all('a', href=True): 480*4882a593Smuzhiyun bb.debug(3, "line['href'] = '%s'" % (line['href'])) 481*4882a593Smuzhiyun bb.debug(3, "line = '%s'" % (str(line))) 482*4882a593Smuzhiyun 483*4882a593Smuzhiyun newver = self._parse_path(package_regex, line['href']) 484*4882a593Smuzhiyun if not newver: 485*4882a593Smuzhiyun newver = self._parse_path(package_regex, str(line)) 486*4882a593Smuzhiyun 487*4882a593Smuzhiyun if newver: 488*4882a593Smuzhiyun bb.debug(3, "Upstream version found: %s" % newver[1]) 489*4882a593Smuzhiyun if valid == 0: 490*4882a593Smuzhiyun version = newver 491*4882a593Smuzhiyun valid = 1 492*4882a593Smuzhiyun elif self._vercmp(version, newver) < 0: 493*4882a593Smuzhiyun version = newver 494*4882a593Smuzhiyun 495*4882a593Smuzhiyun pupver = re.sub('_', '.', version[1]) 496*4882a593Smuzhiyun 497*4882a593Smuzhiyun bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 498*4882a593Smuzhiyun (package, pupver or "N/A", current_version[1])) 499*4882a593Smuzhiyun 500*4882a593Smuzhiyun if valid: 501*4882a593Smuzhiyun return pupver 502*4882a593Smuzhiyun 503*4882a593Smuzhiyun return "" 504*4882a593Smuzhiyun 505*4882a593Smuzhiyun def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 506*4882a593Smuzhiyun """ 507*4882a593Smuzhiyun Scan every directory in order to get upstream version. 508*4882a593Smuzhiyun """ 509*4882a593Smuzhiyun version_dir = ['', '', ''] 510*4882a593Smuzhiyun version = ['', '', ''] 511*4882a593Smuzhiyun 512*4882a593Smuzhiyun dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") 513*4882a593Smuzhiyun s = dirver_regex.search(dirver) 514*4882a593Smuzhiyun if s: 515*4882a593Smuzhiyun version_dir[1] = s.group('ver') 516*4882a593Smuzhiyun else: 517*4882a593Smuzhiyun version_dir[1] = dirver 518*4882a593Smuzhiyun 519*4882a593Smuzhiyun dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 520*4882a593Smuzhiyun ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 521*4882a593Smuzhiyun bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 522*4882a593Smuzhiyun 523*4882a593Smuzhiyun soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 524*4882a593Smuzhiyun if not soup: 525*4882a593Smuzhiyun return version[1] 526*4882a593Smuzhiyun 527*4882a593Smuzhiyun for line in soup.find_all('a', href=True): 528*4882a593Smuzhiyun s = dirver_regex.search(line['href'].strip("/")) 529*4882a593Smuzhiyun if s: 530*4882a593Smuzhiyun sver = s.group('ver') 531*4882a593Smuzhiyun 532*4882a593Smuzhiyun # When prefix is part of the version directory it need to 533*4882a593Smuzhiyun # ensure that only version directory is used so remove previous 534*4882a593Smuzhiyun # directories if exists. 535*4882a593Smuzhiyun # 536*4882a593Smuzhiyun # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 537*4882a593Smuzhiyun # result is v2.5. 538*4882a593Smuzhiyun spfx = s.group('pfx').split('/')[-1] 539*4882a593Smuzhiyun 540*4882a593Smuzhiyun version_dir_new = ['', sver, ''] 541*4882a593Smuzhiyun if self._vercmp(version_dir, version_dir_new) <= 0: 542*4882a593Smuzhiyun dirver_new = spfx + sver 543*4882a593Smuzhiyun path = ud.path.replace(dirver, dirver_new, True) \ 544*4882a593Smuzhiyun .split(package)[0] 545*4882a593Smuzhiyun uri = bb.fetch.encodeurl([ud.type, ud.host, path, 546*4882a593Smuzhiyun ud.user, ud.pswd, {}]) 547*4882a593Smuzhiyun 548*4882a593Smuzhiyun pupver = self._check_latest_version(uri, 549*4882a593Smuzhiyun package, package_regex, current_version, ud, d) 550*4882a593Smuzhiyun if pupver: 551*4882a593Smuzhiyun version[1] = pupver 552*4882a593Smuzhiyun 553*4882a593Smuzhiyun version_dir = version_dir_new 554*4882a593Smuzhiyun 555*4882a593Smuzhiyun return version[1] 556*4882a593Smuzhiyun 557*4882a593Smuzhiyun def _init_regexes(self, package, ud, d): 558*4882a593Smuzhiyun """ 559*4882a593Smuzhiyun Match as many patterns as possible such as: 560*4882a593Smuzhiyun gnome-common-2.20.0.tar.gz (most common format) 561*4882a593Smuzhiyun gtk+-2.90.1.tar.gz 562*4882a593Smuzhiyun xf86-input-synaptics-12.6.9.tar.gz 563*4882a593Smuzhiyun dri2proto-2.3.tar.gz 564*4882a593Smuzhiyun blktool_4.orig.tar.gz 565*4882a593Smuzhiyun libid3tag-0.15.1b.tar.gz 566*4882a593Smuzhiyun unzip552.tar.gz 567*4882a593Smuzhiyun icu4c-3_6-src.tgz 568*4882a593Smuzhiyun genext2fs_1.3.orig.tar.gz 569*4882a593Smuzhiyun gst-fluendo-mp3 570*4882a593Smuzhiyun """ 571*4882a593Smuzhiyun # match most patterns which uses "-" as separator to version digits 572*4882a593Smuzhiyun pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 573*4882a593Smuzhiyun # a loose pattern such as for unzip552.tar.gz 574*4882a593Smuzhiyun pn_prefix2 = r"[a-zA-Z]+" 575*4882a593Smuzhiyun # a loose pattern such as for 80325-quicky-0.4.tar.gz 576*4882a593Smuzhiyun pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 577*4882a593Smuzhiyun # Save the Package Name (pn) Regex for use later 578*4882a593Smuzhiyun pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 579*4882a593Smuzhiyun 580*4882a593Smuzhiyun # match version 581*4882a593Smuzhiyun pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 582*4882a593Smuzhiyun 583*4882a593Smuzhiyun # match arch 584*4882a593Smuzhiyun parch_regex = "-source|_all_" 585*4882a593Smuzhiyun 586*4882a593Smuzhiyun # src.rpm extension was added only for rpm package. Can be removed if the rpm 587*4882a593Smuzhiyun # packaged will always be considered as having to be manually upgraded 588*4882a593Smuzhiyun psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" 589*4882a593Smuzhiyun 590*4882a593Smuzhiyun # match name, version and archive type of a package 591*4882a593Smuzhiyun package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 592*4882a593Smuzhiyun % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 593*4882a593Smuzhiyun self.suffix_regex_comp = re.compile(psuffix_regex) 594*4882a593Smuzhiyun 595*4882a593Smuzhiyun # compile regex, can be specific by package or generic regex 596*4882a593Smuzhiyun pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 597*4882a593Smuzhiyun if pn_regex: 598*4882a593Smuzhiyun package_custom_regex_comp = re.compile(pn_regex) 599*4882a593Smuzhiyun else: 600*4882a593Smuzhiyun version = self._parse_path(package_regex_comp, package) 601*4882a593Smuzhiyun if version: 602*4882a593Smuzhiyun package_custom_regex_comp = re.compile( 603*4882a593Smuzhiyun r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 604*4882a593Smuzhiyun (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 605*4882a593Smuzhiyun else: 606*4882a593Smuzhiyun package_custom_regex_comp = None 607*4882a593Smuzhiyun 608*4882a593Smuzhiyun return package_custom_regex_comp 609*4882a593Smuzhiyun 610*4882a593Smuzhiyun def latest_versionstring(self, ud, d): 611*4882a593Smuzhiyun """ 612*4882a593Smuzhiyun Manipulate the URL and try to obtain the latest package version 613*4882a593Smuzhiyun 614*4882a593Smuzhiyun sanity check to ensure same name and type. 615*4882a593Smuzhiyun """ 616*4882a593Smuzhiyun package = ud.path.split("/")[-1] 617*4882a593Smuzhiyun current_version = ['', d.getVar('PV'), ''] 618*4882a593Smuzhiyun 619*4882a593Smuzhiyun """possible to have no version in pkg name, such as spectrum-fw""" 620*4882a593Smuzhiyun if not re.search(r"\d+", package): 621*4882a593Smuzhiyun current_version[1] = re.sub('_', '.', current_version[1]) 622*4882a593Smuzhiyun current_version[1] = re.sub('-', '.', current_version[1]) 623*4882a593Smuzhiyun return (current_version[1], '') 624*4882a593Smuzhiyun 625*4882a593Smuzhiyun package_regex = self._init_regexes(package, ud, d) 626*4882a593Smuzhiyun if package_regex is None: 627*4882a593Smuzhiyun bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 628*4882a593Smuzhiyun return ('', '') 629*4882a593Smuzhiyun bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 630*4882a593Smuzhiyun 631*4882a593Smuzhiyun uri = "" 632*4882a593Smuzhiyun regex_uri = d.getVar("UPSTREAM_CHECK_URI") 633*4882a593Smuzhiyun if not regex_uri: 634*4882a593Smuzhiyun path = ud.path.split(package)[0] 635*4882a593Smuzhiyun 636*4882a593Smuzhiyun # search for version matches on folders inside the path, like: 637*4882a593Smuzhiyun # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 638*4882a593Smuzhiyun dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 639*4882a593Smuzhiyun m = dirver_regex.search(path) 640*4882a593Smuzhiyun if m: 641*4882a593Smuzhiyun pn = d.getVar('PN') 642*4882a593Smuzhiyun dirver = m.group('dirver') 643*4882a593Smuzhiyun 644*4882a593Smuzhiyun dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 645*4882a593Smuzhiyun if not dirver_pn_regex.search(dirver): 646*4882a593Smuzhiyun return (self._check_latest_version_by_dir(dirver, 647*4882a593Smuzhiyun package, package_regex, current_version, ud, d), '') 648*4882a593Smuzhiyun 649*4882a593Smuzhiyun uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 650*4882a593Smuzhiyun else: 651*4882a593Smuzhiyun uri = regex_uri 652*4882a593Smuzhiyun 653*4882a593Smuzhiyun return (self._check_latest_version(uri, package, package_regex, 654*4882a593Smuzhiyun current_version, ud, d), '') 655