xref: /OK3568_Linux_fs/yocto/bitbake/lib/bb/fetch2/wget.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun"""
2*4882a593SmuzhiyunBitBake 'Fetch' implementations
3*4882a593Smuzhiyun
4*4882a593SmuzhiyunClasses for obtaining upstream sources for the
5*4882a593SmuzhiyunBitBake build tools.
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun"""
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun# Copyright (C) 2003, 2004  Chris Larson
10*4882a593Smuzhiyun#
11*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only
12*4882a593Smuzhiyun#
13*4882a593Smuzhiyun# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14*4882a593Smuzhiyun
15*4882a593Smuzhiyunimport shlex
16*4882a593Smuzhiyunimport re
17*4882a593Smuzhiyunimport tempfile
18*4882a593Smuzhiyunimport os
19*4882a593Smuzhiyunimport errno
20*4882a593Smuzhiyunimport bb
21*4882a593Smuzhiyunimport bb.progress
22*4882a593Smuzhiyunimport socket
23*4882a593Smuzhiyunimport http.client
24*4882a593Smuzhiyunimport urllib.request, urllib.parse, urllib.error
25*4882a593Smuzhiyunfrom   bb.fetch2 import FetchMethod
26*4882a593Smuzhiyunfrom   bb.fetch2 import FetchError
27*4882a593Smuzhiyunfrom   bb.fetch2 import logger
28*4882a593Smuzhiyunfrom   bb.fetch2 import runfetchcmd
29*4882a593Smuzhiyunfrom   bb.utils import export_proxies
30*4882a593Smuzhiyunfrom   bs4 import BeautifulSoup
31*4882a593Smuzhiyunfrom   bs4 import SoupStrainer
32*4882a593Smuzhiyun
33*4882a593Smuzhiyunclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34*4882a593Smuzhiyun    """
35*4882a593Smuzhiyun    Extract progress information from wget output.
36*4882a593Smuzhiyun    Note: relies on --progress=dot (with -v or without -q/-nv) being
37*4882a593Smuzhiyun    specified on the wget command line.
38*4882a593Smuzhiyun    """
39*4882a593Smuzhiyun    def __init__(self, d):
40*4882a593Smuzhiyun        super(WgetProgressHandler, self).__init__(d)
41*4882a593Smuzhiyun        # Send an initial progress event so the bar gets shown
42*4882a593Smuzhiyun        self._fire_progress(0)
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun    def writeline(self, line):
45*4882a593Smuzhiyun        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46*4882a593Smuzhiyun        if percs:
47*4882a593Smuzhiyun            progress = int(percs[-1][0])
48*4882a593Smuzhiyun            rate = percs[-1][1] + '/s'
49*4882a593Smuzhiyun            self.update(progress, rate)
50*4882a593Smuzhiyun            return False
51*4882a593Smuzhiyun        return True
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun
54*4882a593Smuzhiyunclass Wget(FetchMethod):
55*4882a593Smuzhiyun    """Class to fetch urls via 'wget'"""
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58*4882a593Smuzhiyun    # with the standard wget/urllib User-Agent, so pretend to be a modern
59*4882a593Smuzhiyun    # browser.
60*4882a593Smuzhiyun    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun    def check_certs(self, d):
63*4882a593Smuzhiyun        """
64*4882a593Smuzhiyun        Should certificates be checked?
65*4882a593Smuzhiyun        """
66*4882a593Smuzhiyun        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun    def supports(self, ud, d):
69*4882a593Smuzhiyun        """
70*4882a593Smuzhiyun        Check to see if a given url can be fetched with wget.
71*4882a593Smuzhiyun        """
72*4882a593Smuzhiyun        return ud.type in ['http', 'https', 'ftp', 'ftps']
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun    def recommends_checksum(self, urldata):
75*4882a593Smuzhiyun        return True
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun    def urldata_init(self, ud, d):
78*4882a593Smuzhiyun        if 'protocol' in ud.parm:
79*4882a593Smuzhiyun            if ud.parm['protocol'] == 'git':
80*4882a593Smuzhiyun                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun        if 'downloadfilename' in ud.parm:
83*4882a593Smuzhiyun            ud.basename = ud.parm['downloadfilename']
84*4882a593Smuzhiyun        else:
85*4882a593Smuzhiyun            ud.basename = os.path.basename(ud.path)
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
88*4882a593Smuzhiyun        if not ud.localfile:
89*4882a593Smuzhiyun            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun        if not self.check_certs(d):
94*4882a593Smuzhiyun            self.basecmd += " --no-check-certificate"
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun    def _runwget(self, ud, d, command, quiet, workdir=None):
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun        progresshandler = WgetProgressHandler(d)
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
101*4882a593Smuzhiyun        bb.fetch2.check_network_access(d, command, ud.url)
102*4882a593Smuzhiyun        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun    def download(self, ud, d):
105*4882a593Smuzhiyun        """Fetch urls"""
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun        fetchcmd = self.basecmd
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun        if 'downloadfilename' in ud.parm:
110*4882a593Smuzhiyun            localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
111*4882a593Smuzhiyun            bb.utils.mkdirhier(os.path.dirname(localpath))
112*4882a593Smuzhiyun            fetchcmd += " -O %s" % shlex.quote(localpath)
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun        if ud.user and ud.pswd:
115*4882a593Smuzhiyun            fetchcmd += " --auth-no-challenge"
116*4882a593Smuzhiyun            if ud.parm.get("redirectauth", "1") == "1":
117*4882a593Smuzhiyun                # An undocumented feature of wget is that if the
118*4882a593Smuzhiyun                # username/password are specified on the URI, wget will only
119*4882a593Smuzhiyun                # send the Authorization header to the first host and not to
120*4882a593Smuzhiyun                # any hosts that it is redirected to.  With the increasing
121*4882a593Smuzhiyun                # usage of temporary AWS URLs, this difference now matters as
122*4882a593Smuzhiyun                # AWS will reject any request that has authentication both in
123*4882a593Smuzhiyun                # the query parameters (from the redirect) and in the
124*4882a593Smuzhiyun                # Authorization header.
125*4882a593Smuzhiyun                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun        uri = ud.url.split(";")[0]
128*4882a593Smuzhiyun        if os.path.exists(ud.localpath):
129*4882a593Smuzhiyun            # file exists, but we didnt complete it.. trying again..
130*4882a593Smuzhiyun            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
131*4882a593Smuzhiyun        else:
132*4882a593Smuzhiyun            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun        self._runwget(ud, d, fetchcmd, False)
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun        # Sanity check since wget can pretend it succeed when it didn't
137*4882a593Smuzhiyun        # Also, this used to happen if sourceforge sent us to the mirror page
138*4882a593Smuzhiyun        if not os.path.exists(ud.localpath):
139*4882a593Smuzhiyun            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun        if os.path.getsize(ud.localpath) == 0:
142*4882a593Smuzhiyun            os.remove(ud.localpath)
143*4882a593Smuzhiyun            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun        return True
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun    def checkstatus(self, fetch, ud, d, try_again=True):
148*4882a593Smuzhiyun        class HTTPConnectionCache(http.client.HTTPConnection):
149*4882a593Smuzhiyun            if fetch.connection_cache:
150*4882a593Smuzhiyun                def connect(self):
151*4882a593Smuzhiyun                    """Connect to the host and port specified in __init__."""
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun                    sock = fetch.connection_cache.get_connection(self.host, self.port)
154*4882a593Smuzhiyun                    if sock:
155*4882a593Smuzhiyun                        self.sock = sock
156*4882a593Smuzhiyun                    else:
157*4882a593Smuzhiyun                        self.sock = socket.create_connection((self.host, self.port),
158*4882a593Smuzhiyun                                    self.timeout, self.source_address)
159*4882a593Smuzhiyun                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun                    if self._tunnel_host:
162*4882a593Smuzhiyun                        self._tunnel()
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun        class CacheHTTPHandler(urllib.request.HTTPHandler):
165*4882a593Smuzhiyun            def http_open(self, req):
166*4882a593Smuzhiyun                return self.do_open(HTTPConnectionCache, req)
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun            def do_open(self, http_class, req):
169*4882a593Smuzhiyun                """Return an addinfourl object for the request, using http_class.
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun                http_class must implement the HTTPConnection API from httplib.
172*4882a593Smuzhiyun                The addinfourl return value is a file-like object.  It also
173*4882a593Smuzhiyun                has methods and attributes including:
174*4882a593Smuzhiyun                    - info(): return a mimetools.Message object for the headers
175*4882a593Smuzhiyun                    - geturl(): return the original request URL
176*4882a593Smuzhiyun                    - code: HTTP status code
177*4882a593Smuzhiyun                """
178*4882a593Smuzhiyun                host = req.host
179*4882a593Smuzhiyun                if not host:
180*4882a593Smuzhiyun                    raise urllib.error.URLError('no host given')
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun                h = http_class(host, timeout=req.timeout) # will parse host:port
183*4882a593Smuzhiyun                h.set_debuglevel(self._debuglevel)
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun                headers = dict(req.unredirected_hdrs)
186*4882a593Smuzhiyun                headers.update(dict((k, v) for k, v in list(req.headers.items())
187*4882a593Smuzhiyun                            if k not in headers))
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun                # We want to make an HTTP/1.1 request, but the addinfourl
190*4882a593Smuzhiyun                # class isn't prepared to deal with a persistent connection.
191*4882a593Smuzhiyun                # It will try to read all remaining data from the socket,
192*4882a593Smuzhiyun                # which will block while the server waits for the next request.
193*4882a593Smuzhiyun                # So make sure the connection gets closed after the (only)
194*4882a593Smuzhiyun                # request.
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun                # Don't close connection when connection_cache is enabled,
197*4882a593Smuzhiyun                if fetch.connection_cache is None:
198*4882a593Smuzhiyun                    headers["Connection"] = "close"
199*4882a593Smuzhiyun                else:
200*4882a593Smuzhiyun                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun                headers = dict(
203*4882a593Smuzhiyun                    (name.title(), val) for name, val in list(headers.items()))
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun                if req._tunnel_host:
206*4882a593Smuzhiyun                    tunnel_headers = {}
207*4882a593Smuzhiyun                    proxy_auth_hdr = "Proxy-Authorization"
208*4882a593Smuzhiyun                    if proxy_auth_hdr in headers:
209*4882a593Smuzhiyun                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
210*4882a593Smuzhiyun                        # Proxy-Authorization should not be sent to origin
211*4882a593Smuzhiyun                        # server.
212*4882a593Smuzhiyun                        del headers[proxy_auth_hdr]
213*4882a593Smuzhiyun                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun                try:
216*4882a593Smuzhiyun                    h.request(req.get_method(), req.selector, req.data, headers)
217*4882a593Smuzhiyun                except socket.error as err: # XXX what error?
218*4882a593Smuzhiyun                    # Don't close connection when cache is enabled.
219*4882a593Smuzhiyun                    # Instead, try to detect connections that are no longer
220*4882a593Smuzhiyun                    # usable (for example, closed unexpectedly) and remove
221*4882a593Smuzhiyun                    # them from the cache.
222*4882a593Smuzhiyun                    if fetch.connection_cache is None:
223*4882a593Smuzhiyun                        h.close()
224*4882a593Smuzhiyun                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
225*4882a593Smuzhiyun                        # This happens when the server closes the connection despite the Keep-Alive.
226*4882a593Smuzhiyun                        # Apparently urllib then uses the file descriptor, expecting it to be
227*4882a593Smuzhiyun                        # connected, when in reality the connection is already gone.
228*4882a593Smuzhiyun                        # We let the request fail and expect it to be
229*4882a593Smuzhiyun                        # tried once more ("try_again" in check_status()),
230*4882a593Smuzhiyun                        # with the dead connection removed from the cache.
231*4882a593Smuzhiyun                        # If it still fails, we give up, which can happen for bad
232*4882a593Smuzhiyun                        # HTTP proxy settings.
233*4882a593Smuzhiyun                        fetch.connection_cache.remove_connection(h.host, h.port)
234*4882a593Smuzhiyun                    raise urllib.error.URLError(err)
235*4882a593Smuzhiyun                else:
236*4882a593Smuzhiyun                    r = h.getresponse()
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun                # Pick apart the HTTPResponse object to get the addinfourl
239*4882a593Smuzhiyun                # object initialized properly.
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun                # Wrap the HTTPResponse object in socket's file object adapter
242*4882a593Smuzhiyun                # for Windows.  That adapter calls recv(), so delegate recv()
243*4882a593Smuzhiyun                # to read().  This weird wrapping allows the returned object to
244*4882a593Smuzhiyun                # have readline() and readlines() methods.
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun                # XXX It might be better to extract the read buffering code
247*4882a593Smuzhiyun                # out of socket._fileobject() and into a base class.
248*4882a593Smuzhiyun                r.recv = r.read
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun                # no data, just have to read
251*4882a593Smuzhiyun                r.read()
252*4882a593Smuzhiyun                class fp_dummy(object):
253*4882a593Smuzhiyun                    def read(self):
254*4882a593Smuzhiyun                        return ""
255*4882a593Smuzhiyun                    def readline(self):
256*4882a593Smuzhiyun                        return ""
257*4882a593Smuzhiyun                    def close(self):
258*4882a593Smuzhiyun                        pass
259*4882a593Smuzhiyun                    closed = False
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
262*4882a593Smuzhiyun                resp.code = r.status
263*4882a593Smuzhiyun                resp.msg = r.reason
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun                # Close connection when server request it.
266*4882a593Smuzhiyun                if fetch.connection_cache is not None:
267*4882a593Smuzhiyun                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
268*4882a593Smuzhiyun                        fetch.connection_cache.remove_connection(h.host, h.port)
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun                return resp
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun        class HTTPMethodFallback(urllib.request.BaseHandler):
273*4882a593Smuzhiyun            """
274*4882a593Smuzhiyun            Fallback to GET if HEAD is not allowed (405 HTTP error)
275*4882a593Smuzhiyun            """
276*4882a593Smuzhiyun            def http_error_405(self, req, fp, code, msg, headers):
277*4882a593Smuzhiyun                fp.read()
278*4882a593Smuzhiyun                fp.close()
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun                if req.get_method() != 'GET':
281*4882a593Smuzhiyun                    newheaders = dict((k, v) for k, v in list(req.headers.items())
282*4882a593Smuzhiyun                                      if k.lower() not in ("content-length", "content-type"))
283*4882a593Smuzhiyun                    return self.parent.open(urllib.request.Request(req.get_full_url(),
284*4882a593Smuzhiyun                                                            headers=newheaders,
285*4882a593Smuzhiyun                                                            origin_req_host=req.origin_req_host,
286*4882a593Smuzhiyun                                                            unverifiable=True))
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun                raise urllib.request.HTTPError(req, code, msg, headers, None)
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
291*4882a593Smuzhiyun            # Forbidden when they actually mean 405 Method Not Allowed.
292*4882a593Smuzhiyun            http_error_403 = http_error_405
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
296*4882a593Smuzhiyun            """
297*4882a593Smuzhiyun            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
298*4882a593Smuzhiyun            when we want to follow redirects using the original method.
299*4882a593Smuzhiyun            """
300*4882a593Smuzhiyun            def redirect_request(self, req, fp, code, msg, headers, newurl):
301*4882a593Smuzhiyun                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
302*4882a593Smuzhiyun                newreq.get_method = req.get_method
303*4882a593Smuzhiyun                return newreq
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun        # We need to update the environment here as both the proxy and HTTPS
306*4882a593Smuzhiyun        # handlers need variables set. The proxy needs http_proxy and friends to
307*4882a593Smuzhiyun        # be set, and HTTPSHandler ends up calling into openssl to load the
308*4882a593Smuzhiyun        # certificates. In buildtools configurations this will be looking at the
309*4882a593Smuzhiyun        # wrong place for certificates by default: we set SSL_CERT_FILE to the
310*4882a593Smuzhiyun        # right location in the buildtools environment script but as BitBake
311*4882a593Smuzhiyun        # prunes prunes the environment this is lost. When binaries are executed
312*4882a593Smuzhiyun        # runfetchcmd ensures these values are in the environment, but this is
313*4882a593Smuzhiyun        # pure Python so we need to update the environment.
314*4882a593Smuzhiyun        #
315*4882a593Smuzhiyun        # Avoid tramping the environment too much by using bb.utils.environment
316*4882a593Smuzhiyun        # to scope the changes to the build_opener request, which is when the
317*4882a593Smuzhiyun        # environment lookups happen.
318*4882a593Smuzhiyun        newenv = bb.fetch2.get_fetcher_environment(d)
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun        with bb.utils.environment(**newenv):
321*4882a593Smuzhiyun            import ssl
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun            if self.check_certs(d):
324*4882a593Smuzhiyun                context = ssl.create_default_context()
325*4882a593Smuzhiyun            else:
326*4882a593Smuzhiyun                context = ssl._create_unverified_context()
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun            handlers = [FixedHTTPRedirectHandler,
329*4882a593Smuzhiyun                        HTTPMethodFallback,
330*4882a593Smuzhiyun                        urllib.request.ProxyHandler(),
331*4882a593Smuzhiyun                        CacheHTTPHandler(),
332*4882a593Smuzhiyun                        urllib.request.HTTPSHandler(context=context)]
333*4882a593Smuzhiyun            opener = urllib.request.build_opener(*handlers)
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun            try:
336*4882a593Smuzhiyun                uri = ud.url.split(";")[0]
337*4882a593Smuzhiyun                r = urllib.request.Request(uri)
338*4882a593Smuzhiyun                r.get_method = lambda: "HEAD"
339*4882a593Smuzhiyun                # Some servers (FusionForge, as used on Alioth) require that the
340*4882a593Smuzhiyun                # optional Accept header is set.
341*4882a593Smuzhiyun                r.add_header("Accept", "*/*")
342*4882a593Smuzhiyun                r.add_header("User-Agent", self.user_agent)
343*4882a593Smuzhiyun                def add_basic_auth(login_str, request):
344*4882a593Smuzhiyun                    '''Adds Basic auth to http request, pass in login:password as string'''
345*4882a593Smuzhiyun                    import base64
346*4882a593Smuzhiyun                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
347*4882a593Smuzhiyun                    authheader = "Basic %s" % encodeuser
348*4882a593Smuzhiyun                    r.add_header("Authorization", authheader)
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun                if ud.user and ud.pswd:
351*4882a593Smuzhiyun                    add_basic_auth(ud.user + ':' + ud.pswd, r)
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun                try:
354*4882a593Smuzhiyun                    import netrc
355*4882a593Smuzhiyun                    n = netrc.netrc()
356*4882a593Smuzhiyun                    login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
357*4882a593Smuzhiyun                    add_basic_auth("%s:%s" % (login, password), r)
358*4882a593Smuzhiyun                except (TypeError, ImportError, IOError, netrc.NetrcParseError):
359*4882a593Smuzhiyun                    pass
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun                with opener.open(r, timeout=30) as response:
362*4882a593Smuzhiyun                    pass
363*4882a593Smuzhiyun            except urllib.error.URLError as e:
364*4882a593Smuzhiyun                if try_again:
365*4882a593Smuzhiyun                    logger.debug2("checkstatus: trying again")
366*4882a593Smuzhiyun                    return self.checkstatus(fetch, ud, d, False)
367*4882a593Smuzhiyun                else:
368*4882a593Smuzhiyun                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
369*4882a593Smuzhiyun                    logger.debug2("checkstatus() urlopen failed: %s" % e)
370*4882a593Smuzhiyun                    return False
371*4882a593Smuzhiyun            except ConnectionResetError as e:
372*4882a593Smuzhiyun                if try_again:
373*4882a593Smuzhiyun                    logger.debug2("checkstatus: trying again")
374*4882a593Smuzhiyun                    return self.checkstatus(fetch, ud, d, False)
375*4882a593Smuzhiyun                else:
376*4882a593Smuzhiyun                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
377*4882a593Smuzhiyun                    logger.debug2("checkstatus() urlopen failed: %s" % e)
378*4882a593Smuzhiyun                    return False
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun        return True
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun    def _parse_path(self, regex, s):
383*4882a593Smuzhiyun        """
384*4882a593Smuzhiyun        Find and group name, version and archive type in the given string s
385*4882a593Smuzhiyun        """
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun        m = regex.search(s)
388*4882a593Smuzhiyun        if m:
389*4882a593Smuzhiyun            pname = ''
390*4882a593Smuzhiyun            pver = ''
391*4882a593Smuzhiyun            ptype = ''
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun            mdict = m.groupdict()
394*4882a593Smuzhiyun            if 'name' in mdict.keys():
395*4882a593Smuzhiyun                pname = mdict['name']
396*4882a593Smuzhiyun            if 'pver' in mdict.keys():
397*4882a593Smuzhiyun                pver = mdict['pver']
398*4882a593Smuzhiyun            if 'type' in mdict.keys():
399*4882a593Smuzhiyun                ptype = mdict['type']
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun            return (pname, pver, ptype)
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun        return None
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun    def _modelate_version(self, version):
408*4882a593Smuzhiyun        if version[0] in ['.', '-']:
409*4882a593Smuzhiyun            if version[1].isdigit():
410*4882a593Smuzhiyun                version = version[1] + version[0] + version[2:len(version)]
411*4882a593Smuzhiyun            else:
412*4882a593Smuzhiyun                version = version[1:len(version)]
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun        version = re.sub('-', '.', version)
415*4882a593Smuzhiyun        version = re.sub('_', '.', version)
416*4882a593Smuzhiyun        version = re.sub('(rc)+', '.1000.', version)
417*4882a593Smuzhiyun        version = re.sub('(beta)+', '.100.', version)
418*4882a593Smuzhiyun        version = re.sub('(alpha)+', '.10.', version)
419*4882a593Smuzhiyun        if version[0] == 'v':
420*4882a593Smuzhiyun            version = version[1:len(version)]
421*4882a593Smuzhiyun        return version
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun    def _vercmp(self, old, new):
424*4882a593Smuzhiyun        """
425*4882a593Smuzhiyun        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
426*4882a593Smuzhiyun        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
427*4882a593Smuzhiyun        for simplicity as it's somehow difficult to get from various upstream format
428*4882a593Smuzhiyun        """
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun        (oldpn, oldpv, oldsuffix) = old
431*4882a593Smuzhiyun        (newpn, newpv, newsuffix) = new
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun        # Check for a new suffix type that we have never heard of before
434*4882a593Smuzhiyun        if newsuffix:
435*4882a593Smuzhiyun            m = self.suffix_regex_comp.search(newsuffix)
436*4882a593Smuzhiyun            if not m:
437*4882a593Smuzhiyun                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
438*4882a593Smuzhiyun                return False
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun        # Not our package so ignore it
441*4882a593Smuzhiyun        if oldpn != newpn:
442*4882a593Smuzhiyun            return False
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun        oldpv = self._modelate_version(oldpv)
445*4882a593Smuzhiyun        newpv = self._modelate_version(newpv)
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun    def _fetch_index(self, uri, ud, d):
450*4882a593Smuzhiyun        """
451*4882a593Smuzhiyun        Run fetch checkstatus to get directory information
452*4882a593Smuzhiyun        """
453*4882a593Smuzhiyun        f = tempfile.NamedTemporaryFile()
454*4882a593Smuzhiyun        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
455*4882a593Smuzhiyun            fetchcmd = self.basecmd
456*4882a593Smuzhiyun            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
457*4882a593Smuzhiyun            try:
458*4882a593Smuzhiyun                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
459*4882a593Smuzhiyun                fetchresult = f.read()
460*4882a593Smuzhiyun            except bb.fetch2.BBFetchException:
461*4882a593Smuzhiyun                fetchresult = ""
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun        return fetchresult
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
466*4882a593Smuzhiyun        """
467*4882a593Smuzhiyun        Return the latest version of a package inside a given directory path
468*4882a593Smuzhiyun        If error or no version, return ""
469*4882a593Smuzhiyun        """
470*4882a593Smuzhiyun        valid = 0
471*4882a593Smuzhiyun        version = ['', '', '']
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun        bb.debug(3, "VersionURL: %s" % (url))
474*4882a593Smuzhiyun        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
475*4882a593Smuzhiyun        if not soup:
476*4882a593Smuzhiyun            bb.debug(3, "*** %s NO SOUP" % (url))
477*4882a593Smuzhiyun            return ""
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun        for line in soup.find_all('a', href=True):
480*4882a593Smuzhiyun            bb.debug(3, "line['href'] = '%s'" % (line['href']))
481*4882a593Smuzhiyun            bb.debug(3, "line = '%s'" % (str(line)))
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun            newver = self._parse_path(package_regex, line['href'])
484*4882a593Smuzhiyun            if not newver:
485*4882a593Smuzhiyun                newver = self._parse_path(package_regex, str(line))
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun            if newver:
488*4882a593Smuzhiyun                bb.debug(3, "Upstream version found: %s" % newver[1])
489*4882a593Smuzhiyun                if valid == 0:
490*4882a593Smuzhiyun                    version = newver
491*4882a593Smuzhiyun                    valid = 1
492*4882a593Smuzhiyun                elif self._vercmp(version, newver) < 0:
493*4882a593Smuzhiyun                    version = newver
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun        pupver = re.sub('_', '.', version[1])
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
498*4882a593Smuzhiyun                (package, pupver or "N/A", current_version[1]))
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun        if valid:
501*4882a593Smuzhiyun            return pupver
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun        return ""
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
506*4882a593Smuzhiyun        """
507*4882a593Smuzhiyun        Scan every directory in order to get upstream version.
508*4882a593Smuzhiyun        """
509*4882a593Smuzhiyun        version_dir = ['', '', '']
510*4882a593Smuzhiyun        version = ['', '', '']
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
513*4882a593Smuzhiyun        s = dirver_regex.search(dirver)
514*4882a593Smuzhiyun        if s:
515*4882a593Smuzhiyun            version_dir[1] = s.group('ver')
516*4882a593Smuzhiyun        else:
517*4882a593Smuzhiyun            version_dir[1] = dirver
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
520*4882a593Smuzhiyun                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
521*4882a593Smuzhiyun        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
524*4882a593Smuzhiyun        if not soup:
525*4882a593Smuzhiyun            return version[1]
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun        for line in soup.find_all('a', href=True):
528*4882a593Smuzhiyun            s = dirver_regex.search(line['href'].strip("/"))
529*4882a593Smuzhiyun            if s:
530*4882a593Smuzhiyun                sver = s.group('ver')
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun                # When prefix is part of the version directory it need to
533*4882a593Smuzhiyun                # ensure that only version directory is used so remove previous
534*4882a593Smuzhiyun                # directories if exists.
535*4882a593Smuzhiyun                #
536*4882a593Smuzhiyun                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
537*4882a593Smuzhiyun                # result is v2.5.
538*4882a593Smuzhiyun                spfx = s.group('pfx').split('/')[-1]
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun                version_dir_new = ['', sver, '']
541*4882a593Smuzhiyun                if self._vercmp(version_dir, version_dir_new) <= 0:
542*4882a593Smuzhiyun                    dirver_new = spfx + sver
543*4882a593Smuzhiyun                    path = ud.path.replace(dirver, dirver_new, True) \
544*4882a593Smuzhiyun                        .split(package)[0]
545*4882a593Smuzhiyun                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
546*4882a593Smuzhiyun                        ud.user, ud.pswd, {}])
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun                    pupver = self._check_latest_version(uri,
549*4882a593Smuzhiyun                            package, package_regex, current_version, ud, d)
550*4882a593Smuzhiyun                    if pupver:
551*4882a593Smuzhiyun                        version[1] = pupver
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun                    version_dir = version_dir_new
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun        return version[1]
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun    def _init_regexes(self, package, ud, d):
558*4882a593Smuzhiyun        """
559*4882a593Smuzhiyun        Match as many patterns as possible such as:
560*4882a593Smuzhiyun                gnome-common-2.20.0.tar.gz (most common format)
561*4882a593Smuzhiyun                gtk+-2.90.1.tar.gz
562*4882a593Smuzhiyun                xf86-input-synaptics-12.6.9.tar.gz
563*4882a593Smuzhiyun                dri2proto-2.3.tar.gz
564*4882a593Smuzhiyun                blktool_4.orig.tar.gz
565*4882a593Smuzhiyun                libid3tag-0.15.1b.tar.gz
566*4882a593Smuzhiyun                unzip552.tar.gz
567*4882a593Smuzhiyun                icu4c-3_6-src.tgz
568*4882a593Smuzhiyun                genext2fs_1.3.orig.tar.gz
569*4882a593Smuzhiyun                gst-fluendo-mp3
570*4882a593Smuzhiyun        """
571*4882a593Smuzhiyun        # match most patterns which uses "-" as separator to version digits
572*4882a593Smuzhiyun        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
573*4882a593Smuzhiyun        # a loose pattern such as for unzip552.tar.gz
574*4882a593Smuzhiyun        pn_prefix2 = r"[a-zA-Z]+"
575*4882a593Smuzhiyun        # a loose pattern such as for 80325-quicky-0.4.tar.gz
576*4882a593Smuzhiyun        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
577*4882a593Smuzhiyun        # Save the Package Name (pn) Regex for use later
578*4882a593Smuzhiyun        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun        # match version
581*4882a593Smuzhiyun        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun        # match arch
584*4882a593Smuzhiyun        parch_regex = "-source|_all_"
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun        # src.rpm extension was added only for rpm package. Can be removed if the rpm
587*4882a593Smuzhiyun        # packaged will always be considered as having to be manually upgraded
588*4882a593Smuzhiyun        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun        # match name, version and archive type of a package
591*4882a593Smuzhiyun        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
592*4882a593Smuzhiyun                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
593*4882a593Smuzhiyun        self.suffix_regex_comp = re.compile(psuffix_regex)
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun        # compile regex, can be specific by package or generic regex
596*4882a593Smuzhiyun        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
597*4882a593Smuzhiyun        if pn_regex:
598*4882a593Smuzhiyun            package_custom_regex_comp = re.compile(pn_regex)
599*4882a593Smuzhiyun        else:
600*4882a593Smuzhiyun            version = self._parse_path(package_regex_comp, package)
601*4882a593Smuzhiyun            if version:
602*4882a593Smuzhiyun                package_custom_regex_comp = re.compile(
603*4882a593Smuzhiyun                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
604*4882a593Smuzhiyun                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
605*4882a593Smuzhiyun            else:
606*4882a593Smuzhiyun                package_custom_regex_comp = None
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun        return package_custom_regex_comp
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun    def latest_versionstring(self, ud, d):
611*4882a593Smuzhiyun        """
612*4882a593Smuzhiyun        Manipulate the URL and try to obtain the latest package version
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun        sanity check to ensure same name and type.
615*4882a593Smuzhiyun        """
616*4882a593Smuzhiyun        package = ud.path.split("/")[-1]
617*4882a593Smuzhiyun        current_version = ['', d.getVar('PV'), '']
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun        """possible to have no version in pkg name, such as spectrum-fw"""
620*4882a593Smuzhiyun        if not re.search(r"\d+", package):
621*4882a593Smuzhiyun            current_version[1] = re.sub('_', '.', current_version[1])
622*4882a593Smuzhiyun            current_version[1] = re.sub('-', '.', current_version[1])
623*4882a593Smuzhiyun            return (current_version[1], '')
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun        package_regex = self._init_regexes(package, ud, d)
626*4882a593Smuzhiyun        if package_regex is None:
627*4882a593Smuzhiyun            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
628*4882a593Smuzhiyun            return ('', '')
629*4882a593Smuzhiyun        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun        uri = ""
632*4882a593Smuzhiyun        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
633*4882a593Smuzhiyun        if not regex_uri:
634*4882a593Smuzhiyun            path = ud.path.split(package)[0]
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun            # search for version matches on folders inside the path, like:
637*4882a593Smuzhiyun            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
638*4882a593Smuzhiyun            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
639*4882a593Smuzhiyun            m = dirver_regex.search(path)
640*4882a593Smuzhiyun            if m:
641*4882a593Smuzhiyun                pn = d.getVar('PN')
642*4882a593Smuzhiyun                dirver = m.group('dirver')
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
645*4882a593Smuzhiyun                if not dirver_pn_regex.search(dirver):
646*4882a593Smuzhiyun                    return (self._check_latest_version_by_dir(dirver,
647*4882a593Smuzhiyun                        package, package_regex, current_version, ud, d), '')
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
650*4882a593Smuzhiyun        else:
651*4882a593Smuzhiyun            uri = regex_uri
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun        return (self._check_latest_version(uri, package, package_regex,
654*4882a593Smuzhiyun                current_version, ud, d), '')
655