xref: /OK3568_Linux_fs/yocto/poky/meta/classes/create-spdx.bbclass (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}"
6
7# The product name that the CVE database uses.  Defaults to BPN, but may need to
8# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff).
9CVE_PRODUCT ??= "${BPN}"
10CVE_VERSION ??= "${PV}"
11
12SPDXDIR ??= "${WORKDIR}/spdx"
13SPDXDEPLOY = "${SPDXDIR}/deploy"
14SPDXWORK = "${SPDXDIR}/work"
15
16SPDX_TOOL_NAME ??= "oe-spdx-creator"
17SPDX_TOOL_VERSION ??= "1.0"
18
19SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
20
21SPDX_INCLUDE_SOURCES ??= "0"
22SPDX_ARCHIVE_SOURCES ??= "0"
23SPDX_ARCHIVE_PACKAGED ??= "0"
24
25SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
26SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc"
27SPDX_PRETTY ??= "0"
28
29SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
30
31SPDX_ORG ??= "OpenEmbedded ()"
32SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}"
33SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \
34    this recipe. For SPDX documents create using this class during the build, this \
35    is the contact information for the person or organization who is doing the \
36    build."
37
38def extract_licenses(filename):
39    import re
40
41    lic_regex = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
42
43    try:
44        with open(filename, 'rb') as f:
45            size = min(15000, os.stat(filename).st_size)
46            txt = f.read(size)
47            licenses = re.findall(lic_regex, txt)
48            if licenses:
49                ascii_licenses = [lic.decode('ascii') for lic in licenses]
50                return ascii_licenses
51    except Exception as e:
52        bb.warn(f"Exception reading {filename}: {e}")
53    return None
54
55def get_doc_namespace(d, doc):
56    import uuid
57    namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
58    return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name)))
59
60def create_annotation(d, comment):
61    from datetime import datetime, timezone
62
63    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
64    annotation = oe.spdx.SPDXAnnotation()
65    annotation.annotationDate = creation_time
66    annotation.annotationType = "OTHER"
67    annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION"))
68    annotation.comment = comment
69    return annotation
70
71def recipe_spdx_is_native(d, recipe):
72    return any(a.annotationType == "OTHER" and
73      a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and
74      a.comment == "isNative" for a in recipe.annotations)
75
76def is_work_shared_spdx(d):
77    return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR'))
78
79def get_json_indent(d):
80    if d.getVar("SPDX_PRETTY") == "1":
81        return 2
82    return None
83
84python() {
85    import json
86    if d.getVar("SPDX_LICENSE_DATA"):
87        return
88
89    with open(d.getVar("SPDX_LICENSES"), "r") as f:
90        data = json.load(f)
91        # Transform the license array to a dictionary
92        data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
93        d.setVar("SPDX_LICENSE_DATA", data)
94}
95
96def convert_license_to_spdx(lic, document, d, existing={}):
97    from pathlib import Path
98    import oe.spdx
99
100    license_data = d.getVar("SPDX_LICENSE_DATA")
101    extracted = {}
102
103    def add_extracted_license(ident, name):
104        nonlocal document
105
106        if name in extracted:
107            return
108
109        extracted_info = oe.spdx.SPDXExtractedLicensingInfo()
110        extracted_info.name = name
111        extracted_info.licenseId = ident
112        extracted_info.extractedText = None
113
114        if name == "PD":
115            # Special-case this.
116            extracted_info.extractedText = "Software released to the public domain"
117        else:
118            # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
119            for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split():
120                try:
121                    with (Path(directory) / name).open(errors="replace") as f:
122                        extracted_info.extractedText = f.read()
123                        break
124                except FileNotFoundError:
125                    pass
126            if extracted_info.extractedText is None:
127                # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
128                filename = d.getVarFlag('NO_GENERIC_LICENSE', name)
129                if filename:
130                    filename = d.expand("${S}/" + filename)
131                    with open(filename, errors="replace") as f:
132                        extracted_info.extractedText = f.read()
133                else:
134                    bb.error("Cannot find any text for license %s" % name)
135
136        extracted[name] = extracted_info
137        document.hasExtractedLicensingInfos.append(extracted_info)
138
139    def convert(l):
140        if l == "(" or l == ")":
141            return l
142
143        if l == "&":
144            return "AND"
145
146        if l == "|":
147            return "OR"
148
149        if l == "CLOSED":
150            return "NONE"
151
152        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
153        if spdx_license in license_data["licenses"]:
154            return spdx_license
155
156        try:
157            spdx_license = existing[l]
158        except KeyError:
159            spdx_license = "LicenseRef-" + l
160            add_extracted_license(spdx_license, l)
161
162        return spdx_license
163
164    lic_split = lic.replace("(", " ( ").replace(")", " ) ").split()
165
166    return ' '.join(convert(l) for l in lic_split)
167
168def process_sources(d):
169    pn = d.getVar('PN')
170    assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split()
171    if pn in assume_provided:
172        for p in d.getVar("PROVIDES").split():
173            if p != pn:
174                pn = p
175                break
176
177    # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted,
178    # so avoid archiving source here.
179    if pn.startswith('glibc-locale'):
180        return False
181    if d.getVar('PN') == "libtool-cross":
182        return False
183    if d.getVar('PN') == "libgcc-initial":
184        return False
185    if d.getVar('PN') == "shadow-sysroot":
186        return False
187
188    # We just archive gcc-source for all the gcc related recipes
189    if d.getVar('BPN') in ['gcc', 'libgcc']:
190        bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn)
191        return False
192
193    return True
194
195
196def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]):
197    from pathlib import Path
198    import oe.spdx
199    import hashlib
200
201    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
202    if source_date_epoch:
203        source_date_epoch = int(source_date_epoch)
204
205    sha1s = []
206    spdx_files = []
207
208    file_counter = 1
209    for subdir, dirs, files in os.walk(topdir):
210        dirs[:] = [d for d in dirs if d not in ignore_dirs]
211        if subdir == str(topdir):
212            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
213
214        for file in files:
215            filepath = Path(subdir) / file
216            filename = str(filepath.relative_to(topdir))
217
218            if not filepath.is_symlink() and filepath.is_file():
219                spdx_file = oe.spdx.SPDXFile()
220                spdx_file.SPDXID = get_spdxid(file_counter)
221                for t in get_types(filepath):
222                    spdx_file.fileTypes.append(t)
223                spdx_file.fileName = filename
224
225                if archive is not None:
226                    with filepath.open("rb") as f:
227                        info = archive.gettarinfo(fileobj=f)
228                        info.name = filename
229                        info.uid = 0
230                        info.gid = 0
231                        info.uname = "root"
232                        info.gname = "root"
233
234                        if source_date_epoch is not None and info.mtime > source_date_epoch:
235                            info.mtime = source_date_epoch
236
237                        archive.addfile(info, f)
238
239                sha1 = bb.utils.sha1_file(filepath)
240                sha1s.append(sha1)
241                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
242                        algorithm="SHA1",
243                        checksumValue=sha1,
244                    ))
245                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
246                        algorithm="SHA256",
247                        checksumValue=bb.utils.sha256_file(filepath),
248                    ))
249
250                if "SOURCE" in spdx_file.fileTypes:
251                    extracted_lics = extract_licenses(filepath)
252                    if extracted_lics:
253                        spdx_file.licenseInfoInFiles = extracted_lics
254
255                doc.files.append(spdx_file)
256                doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
257                spdx_pkg.hasFiles.append(spdx_file.SPDXID)
258
259                spdx_files.append(spdx_file)
260
261                file_counter += 1
262
263    sha1s.sort()
264    verifier = hashlib.sha1()
265    for v in sha1s:
266        verifier.update(v.encode("utf-8"))
267    spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest()
268
269    return spdx_files
270
271
272def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources):
273    from pathlib import Path
274    import hashlib
275    import oe.packagedata
276    import oe.spdx
277
278    debug_search_paths = [
279        Path(d.getVar('PKGD')),
280        Path(d.getVar('STAGING_DIR_TARGET')),
281        Path(d.getVar('STAGING_DIR_NATIVE')),
282        Path(d.getVar('STAGING_KERNEL_DIR')),
283    ]
284
285    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
286
287    if pkg_data is None:
288        return
289
290    for file_path, file_data in pkg_data["files_info"].items():
291        if not "debugsrc" in file_data:
292            continue
293
294        for pkg_file in package_files:
295            if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"):
296                break
297        else:
298            bb.fatal("No package file found for %s" % str(file_path))
299            continue
300
301        for debugsrc in file_data["debugsrc"]:
302            ref_id = "NOASSERTION"
303            for search in debug_search_paths:
304                if debugsrc.startswith("/usr/src/kernel"):
305                    debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '')
306                else:
307                    debugsrc_path = search / debugsrc.lstrip("/")
308                if not debugsrc_path.exists():
309                    continue
310
311                file_sha256 = bb.utils.sha256_file(debugsrc_path)
312
313                if file_sha256 in sources:
314                    source_file = sources[file_sha256]
315
316                    doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace)
317                    if doc_ref is None:
318                        doc_ref = oe.spdx.SPDXExternalDocumentRef()
319                        doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name
320                        doc_ref.spdxDocument = source_file.doc.documentNamespace
321                        doc_ref.checksum.algorithm = "SHA1"
322                        doc_ref.checksum.checksumValue = source_file.doc_sha1
323                        package_doc.externalDocumentRefs.append(doc_ref)
324
325                    ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID)
326                else:
327                    bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256))
328                break
329            else:
330                bb.debug(1, "Debug source %s not found" % debugsrc)
331
332            package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc)
333
334def collect_dep_recipes(d, doc, spdx_recipe):
335    from pathlib import Path
336    import oe.sbom
337    import oe.spdx
338
339    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
340
341    dep_recipes = []
342    taskdepdata = d.getVar("BB_TASKDEPDATA", False)
343    deps = sorted(set(
344        dep[0] for dep in taskdepdata.values() if
345            dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN")
346    ))
347    for dep_pn in deps:
348        dep_recipe_path = deploy_dir_spdx / "recipes" / ("recipe-%s.spdx.json" % dep_pn)
349
350        spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path)
351
352        for pkg in spdx_dep_doc.packages:
353            if pkg.name == dep_pn:
354                spdx_dep_recipe = pkg
355                break
356        else:
357            continue
358
359        dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe))
360
361        dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef()
362        dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name
363        dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace
364        dep_recipe_ref.checksum.algorithm = "SHA1"
365        dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1
366
367        doc.externalDocumentRefs.append(dep_recipe_ref)
368
369        doc.add_relationship(
370            "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID),
371            "BUILD_DEPENDENCY_OF",
372            spdx_recipe
373        )
374
375    return dep_recipes
376
377collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA"
378
379
380def collect_dep_sources(d, dep_recipes):
381    import oe.sbom
382
383    sources = {}
384    for dep in dep_recipes:
385        # Don't collect sources from native recipes as they
386        # match non-native sources also.
387        if recipe_spdx_is_native(d, dep.recipe):
388            continue
389        recipe_files = set(dep.recipe.hasFiles)
390
391        for spdx_file in dep.doc.files:
392            if spdx_file.SPDXID not in recipe_files:
393                continue
394
395            if "SOURCE" in spdx_file.fileTypes:
396                for checksum in spdx_file.checksums:
397                    if checksum.algorithm == "SHA256":
398                        sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file)
399                        break
400
401    return sources
402
403
404python do_create_spdx() {
405    from datetime import datetime, timezone
406    import oe.sbom
407    import oe.spdx
408    import uuid
409    from pathlib import Path
410    from contextlib import contextmanager
411    import oe.cve_check
412
413    @contextmanager
414    def optional_tarfile(name, guard, mode="w"):
415        import tarfile
416        import bb.compress.zstd
417
418        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
419
420        if guard:
421            name.parent.mkdir(parents=True, exist_ok=True)
422            with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f:
423                with tarfile.open(fileobj=f, mode=mode + "|") as tf:
424                    yield tf
425        else:
426            yield None
427
428
429    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
430    spdx_workdir = Path(d.getVar("SPDXWORK"))
431    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
432    archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1"
433    archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1"
434
435    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
436
437    doc = oe.spdx.SPDXDocument()
438
439    doc.name = "recipe-" + d.getVar("PN")
440    doc.documentNamespace = get_doc_namespace(d, doc)
441    doc.creationInfo.created = creation_time
442    doc.creationInfo.comment = "This document was created by analyzing recipe files during the build."
443    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
444    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
445    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
446    doc.creationInfo.creators.append("Person: N/A ()")
447
448    recipe = oe.spdx.SPDXPackage()
449    recipe.name = d.getVar("PN")
450    recipe.versionInfo = d.getVar("PV")
451    recipe.SPDXID = oe.sbom.get_recipe_spdxid(d)
452    recipe.supplier = d.getVar("SPDX_SUPPLIER")
453    if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d):
454        recipe.annotations.append(create_annotation(d, "isNative"))
455
456    for s in d.getVar('SRC_URI').split():
457        if not s.startswith("file://"):
458            s = s.split(';')[0]
459            recipe.downloadLocation = s
460            break
461    else:
462        recipe.downloadLocation = "NOASSERTION"
463
464    homepage = d.getVar("HOMEPAGE")
465    if homepage:
466        recipe.homepage = homepage
467
468    license = d.getVar("LICENSE")
469    if license:
470        recipe.licenseDeclared = convert_license_to_spdx(license, doc, d)
471
472    summary = d.getVar("SUMMARY")
473    if summary:
474        recipe.summary = summary
475
476    description = d.getVar("DESCRIPTION")
477    if description:
478        recipe.description = description
479
480    # Some CVEs may be patched during the build process without incrementing the version number,
481    # so querying for CVEs based on the CPE id can lead to false positives. To account for this,
482    # save the CVEs fixed by patches to source information field in the SPDX.
483    patched_cves = oe.cve_check.get_patched_cves(d)
484    patched_cves = list(patched_cves)
485    patched_cves = ' '.join(patched_cves)
486    if patched_cves:
487        recipe.sourceInfo = "CVEs fixed: " + patched_cves
488
489    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
490    if cpe_ids:
491        for cpe_id in cpe_ids:
492            cpe = oe.spdx.SPDXExternalReference()
493            cpe.referenceCategory = "SECURITY"
494            cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type"
495            cpe.referenceLocator = cpe_id
496            recipe.externalRefs.append(cpe)
497
498    doc.packages.append(recipe)
499    doc.add_relationship(doc, "DESCRIBES", recipe)
500
501    if process_sources(d) and include_sources:
502        recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst")
503        with optional_tarfile(recipe_archive, archive_sources) as archive:
504            spdx_get_src(d)
505
506            add_package_files(
507                d,
508                doc,
509                recipe,
510                spdx_workdir,
511                lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter),
512                lambda filepath: ["SOURCE"],
513                ignore_dirs=[".git"],
514                ignore_top_level_dirs=["temp"],
515                archive=archive,
516            )
517
518            if archive is not None:
519                recipe.packageFileName = str(recipe_archive.name)
520
521    dep_recipes = collect_dep_recipes(d, doc, recipe)
522
523    doc_sha1 = oe.sbom.write_doc(d, doc, "recipes", indent=get_json_indent(d))
524    dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe))
525
526    recipe_ref = oe.spdx.SPDXExternalDocumentRef()
527    recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name
528    recipe_ref.spdxDocument = doc.documentNamespace
529    recipe_ref.checksum.algorithm = "SHA1"
530    recipe_ref.checksum.checksumValue = doc_sha1
531
532    sources = collect_dep_sources(d, dep_recipes)
533    found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos}
534
535    if not recipe_spdx_is_native(d, recipe):
536        bb.build.exec_func("read_subpackage_metadata", d)
537
538        pkgdest = Path(d.getVar("PKGDEST"))
539        for package in d.getVar("PACKAGES").split():
540            if not oe.packagedata.packaged(package, d):
541                continue
542
543            package_doc = oe.spdx.SPDXDocument()
544            pkg_name = d.getVar("PKG:%s" % package) or package
545            package_doc.name = pkg_name
546            package_doc.documentNamespace = get_doc_namespace(d, package_doc)
547            package_doc.creationInfo.created = creation_time
548            package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build."
549            package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
550            package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
551            package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
552            package_doc.creationInfo.creators.append("Person: N/A ()")
553            package_doc.externalDocumentRefs.append(recipe_ref)
554
555            package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE")
556
557            spdx_package = oe.spdx.SPDXPackage()
558
559            spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name)
560            spdx_package.name = pkg_name
561            spdx_package.versionInfo = d.getVar("PV")
562            spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses)
563            spdx_package.supplier = d.getVar("SPDX_SUPPLIER")
564
565            package_doc.packages.append(spdx_package)
566
567            package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID))
568            package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package)
569
570            package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst")
571            with optional_tarfile(package_archive, archive_packaged) as archive:
572                package_files = add_package_files(
573                    d,
574                    package_doc,
575                    spdx_package,
576                    pkgdest / package,
577                    lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter),
578                    lambda filepath: ["BINARY"],
579                    ignore_top_level_dirs=['CONTROL', 'DEBIAN'],
580                    archive=archive,
581                )
582
583                if archive is not None:
584                    spdx_package.packageFileName = str(package_archive.name)
585
586            add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources)
587
588            oe.sbom.write_doc(d, package_doc, "packages", indent=get_json_indent(d))
589}
590# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
591addtask do_create_spdx after do_package do_packagedata do_unpack before do_populate_sdk do_build do_rm_work
592
593SSTATETASKS += "do_create_spdx"
594do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
595do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
596
597python do_create_spdx_setscene () {
598    sstate_setscene(d)
599}
600addtask do_create_spdx_setscene
601
602do_create_spdx[dirs] = "${SPDXWORK}"
603do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
604do_create_spdx[depends] += "${PATCHDEPENDENCY}"
605do_create_spdx[deptask] = "do_create_spdx"
606
607def collect_package_providers(d):
608    from pathlib import Path
609    import oe.sbom
610    import oe.spdx
611    import json
612
613    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
614
615    providers = {}
616
617    taskdepdata = d.getVar("BB_TASKDEPDATA", False)
618    deps = sorted(set(
619        dep[0] for dep in taskdepdata.values() if dep[0] != d.getVar("PN")
620    ))
621    deps.append(d.getVar("PN"))
622
623    for dep_pn in deps:
624        recipe_data = oe.packagedata.read_pkgdata(dep_pn, d)
625
626        for pkg in recipe_data.get("PACKAGES", "").split():
627
628            pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, d)
629            rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items())
630            rprovides.add(pkg)
631
632            for r in rprovides:
633                providers[r] = pkg
634
635    return providers
636
637collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA"
638
639python do_create_runtime_spdx() {
640    from datetime import datetime, timezone
641    import oe.sbom
642    import oe.spdx
643    import oe.packagedata
644    from pathlib import Path
645
646    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
647    spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
648    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
649
650    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
651
652    providers = collect_package_providers(d)
653
654    if not is_native:
655        bb.build.exec_func("read_subpackage_metadata", d)
656
657        dep_package_cache = {}
658
659        pkgdest = Path(d.getVar("PKGDEST"))
660        for package in d.getVar("PACKAGES").split():
661            localdata = bb.data.createCopy(d)
662            pkg_name = d.getVar("PKG:%s" % package) or package
663            localdata.setVar("PKG", pkg_name)
664            localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package)
665
666            if not oe.packagedata.packaged(package, localdata):
667                continue
668
669            pkg_spdx_path = deploy_dir_spdx / "packages" / (pkg_name + ".spdx.json")
670
671            package_doc, package_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
672
673            for p in package_doc.packages:
674                if p.name == pkg_name:
675                    spdx_package = p
676                    break
677            else:
678                bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path))
679
680            runtime_doc = oe.spdx.SPDXDocument()
681            runtime_doc.name = "runtime-" + pkg_name
682            runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc)
683            runtime_doc.creationInfo.created = creation_time
684            runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies."
685            runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
686            runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
687            runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
688            runtime_doc.creationInfo.creators.append("Person: N/A ()")
689
690            package_ref = oe.spdx.SPDXExternalDocumentRef()
691            package_ref.externalDocumentId = "DocumentRef-package-" + package
692            package_ref.spdxDocument = package_doc.documentNamespace
693            package_ref.checksum.algorithm = "SHA1"
694            package_ref.checksum.checksumValue = package_doc_sha1
695
696            runtime_doc.externalDocumentRefs.append(package_ref)
697
698            runtime_doc.add_relationship(
699                runtime_doc.SPDXID,
700                "AMENDS",
701                "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID)
702            )
703
704            deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
705            seen_deps = set()
706            for dep, _ in deps.items():
707                if dep in seen_deps:
708                    continue
709
710                if dep not in providers:
711                    continue
712
713                dep = providers[dep]
714
715                if not oe.packagedata.packaged(dep, localdata):
716                    continue
717
718                dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
719                dep_pkg = dep_pkg_data["PKG"]
720
721                if dep in dep_package_cache:
722                    (dep_spdx_package, dep_package_ref) = dep_package_cache[dep]
723                else:
724                    dep_path = deploy_dir_spdx / "packages" / ("%s.spdx.json" % dep_pkg)
725
726                    spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_path)
727
728                    for pkg in spdx_dep_doc.packages:
729                        if pkg.name == dep_pkg:
730                            dep_spdx_package = pkg
731                            break
732                    else:
733                        bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path))
734
735                    dep_package_ref = oe.spdx.SPDXExternalDocumentRef()
736                    dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name
737                    dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace
738                    dep_package_ref.checksum.algorithm = "SHA1"
739                    dep_package_ref.checksum.checksumValue = spdx_dep_sha1
740
741                    dep_package_cache[dep] = (dep_spdx_package, dep_package_ref)
742
743                runtime_doc.externalDocumentRefs.append(dep_package_ref)
744
745                runtime_doc.add_relationship(
746                    "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID),
747                    "RUNTIME_DEPENDENCY_OF",
748                    "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID)
749                )
750                seen_deps.add(dep)
751
752            oe.sbom.write_doc(d, runtime_doc, "runtime", spdx_deploy, indent=get_json_indent(d))
753}
754
755addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work
756SSTATETASKS += "do_create_runtime_spdx"
757do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}"
758do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
759
760python do_create_runtime_spdx_setscene () {
761    sstate_setscene(d)
762}
763addtask do_create_runtime_spdx_setscene
764
765do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}"
766do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}"
767do_create_runtime_spdx[rdeptask] = "do_create_spdx"
768
769def spdx_get_src(d):
770    """
771    save patched source of the recipe in SPDX_WORKDIR.
772    """
773    import shutil
774    spdx_workdir = d.getVar('SPDXWORK')
775    spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE')
776    pn = d.getVar('PN')
777
778    workdir = d.getVar("WORKDIR")
779
780    try:
781        # The kernel class functions require it to be on work-shared, so we dont change WORKDIR
782        if not is_work_shared_spdx(d):
783            # Change the WORKDIR to make do_unpack do_patch run in another dir.
784            d.setVar('WORKDIR', spdx_workdir)
785            # Restore the original path to recipe's native sysroot (it's relative to WORKDIR).
786            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
787
788            # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the
789            # possibly requiring of the following tasks (such as some recipes's
790            # do_patch required 'B' existed).
791            bb.utils.mkdirhier(d.getVar('B'))
792
793            bb.build.exec_func('do_unpack', d)
794        # Copy source of kernel to spdx_workdir
795        if is_work_shared_spdx(d):
796            share_src = d.getVar('WORKDIR')
797            d.setVar('WORKDIR', spdx_workdir)
798            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
799            src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR')
800            bb.utils.mkdirhier(src_dir)
801            if bb.data.inherits_class('kernel',d):
802                share_src = d.getVar('STAGING_KERNEL_DIR')
803            cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/"
804            cmd_copy_shared_res = os.popen(cmd_copy_share).read()
805            bb.note("cmd_copy_shared_result = " + cmd_copy_shared_res)
806
807            git_path = src_dir + "/.git"
808            if os.path.exists(git_path):
809                shutils.rmtree(git_path)
810
811        # Make sure gcc and kernel sources are patched only once
812        if not (d.getVar('SRC_URI') == "" or is_work_shared_spdx(d)):
813            bb.build.exec_func('do_patch', d)
814
815        # Some userland has no source.
816        if not os.path.exists( spdx_workdir ):
817            bb.utils.mkdirhier(spdx_workdir)
818    finally:
819        d.setVar("WORKDIR", workdir)
820
821do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
822
823ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx ; "
824
825do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
826POPULATE_SDK_POST_HOST_COMMAND:append:task-populate-sdk = " sdk_host_combine_spdx; "
827POPULATE_SDK_POST_TARGET_COMMAND:append:task-populate-sdk = " sdk_target_combine_spdx; "
828
829python image_combine_spdx() {
830    import os
831    import oe.sbom
832    from pathlib import Path
833    from oe.rootfs import image_list_installed_packages
834
835    image_name = d.getVar("IMAGE_NAME")
836    image_link_name = d.getVar("IMAGE_LINK_NAME")
837    imgdeploydir = Path(d.getVar("IMGDEPLOYDIR"))
838    img_spdxid = oe.sbom.get_image_spdxid(image_name)
839    packages = image_list_installed_packages(d)
840
841    combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages)
842
843    def make_image_link(target_path, suffix):
844        if image_link_name:
845            link = imgdeploydir / (image_link_name + suffix)
846            if link != target_path:
847                link.symlink_to(os.path.relpath(target_path, link.parent))
848
849    image_spdx_path = imgdeploydir / (image_name + ".spdx.json")
850    make_image_link(image_spdx_path, ".spdx.json")
851    spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst")
852    make_image_link(spdx_tar_path, ".spdx.tar.zst")
853    spdx_index_path = imgdeploydir / (image_name + ".spdx.index.json")
854    make_image_link(spdx_index_path, ".spdx.index.json")
855}
856
857python sdk_host_combine_spdx() {
858    sdk_combine_spdx(d, "host")
859}
860
861python sdk_target_combine_spdx() {
862    sdk_combine_spdx(d, "target")
863}
864
865def sdk_combine_spdx(d, sdk_type):
866    import oe.sbom
867    from pathlib import Path
868    from oe.sdk import sdk_list_installed_packages
869
870    sdk_name = d.getVar("SDK_NAME") + "-" + sdk_type
871    sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR"))
872    sdk_spdxid = oe.sbom.get_sdk_spdxid(sdk_name)
873    sdk_packages = sdk_list_installed_packages(d, sdk_type == "target")
874    combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages)
875
876def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages):
877    import os
878    import oe.spdx
879    import oe.sbom
880    import io
881    import json
882    from datetime import timezone, datetime
883    from pathlib import Path
884    import tarfile
885    import bb.compress.zstd
886
887    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
888    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
889    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
890
891    doc = oe.spdx.SPDXDocument()
892    doc.name = rootfs_name
893    doc.documentNamespace = get_doc_namespace(d, doc)
894    doc.creationInfo.created = creation_time
895    doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build."
896    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
897    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
898    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
899    doc.creationInfo.creators.append("Person: N/A ()")
900
901    image = oe.spdx.SPDXPackage()
902    image.name = d.getVar("PN")
903    image.versionInfo = d.getVar("PV")
904    image.SPDXID = rootfs_spdxid
905    image.supplier = d.getVar("SPDX_SUPPLIER")
906
907    doc.packages.append(image)
908
909    for name in sorted(packages.keys()):
910        pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json")
911        pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
912
913        for p in pkg_doc.packages:
914            if p.name == name:
915                pkg_ref = oe.spdx.SPDXExternalDocumentRef()
916                pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name
917                pkg_ref.spdxDocument = pkg_doc.documentNamespace
918                pkg_ref.checksum.algorithm = "SHA1"
919                pkg_ref.checksum.checksumValue = pkg_doc_sha1
920
921                doc.externalDocumentRefs.append(pkg_ref)
922                doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID))
923                break
924        else:
925            bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path))
926
927        runtime_spdx_path = deploy_dir_spdx / "runtime" / ("runtime-" + name + ".spdx.json")
928        runtime_doc, runtime_doc_sha1 = oe.sbom.read_doc(runtime_spdx_path)
929
930        runtime_ref = oe.spdx.SPDXExternalDocumentRef()
931        runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name
932        runtime_ref.spdxDocument = runtime_doc.documentNamespace
933        runtime_ref.checksum.algorithm = "SHA1"
934        runtime_ref.checksum.checksumValue = runtime_doc_sha1
935
936        # "OTHER" isn't ideal here, but I can't find a relationship that makes sense
937        doc.externalDocumentRefs.append(runtime_ref)
938        doc.add_relationship(
939            image,
940            "OTHER",
941            "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID),
942            comment="Runtime dependencies for %s" % name
943        )
944
945    image_spdx_path = rootfs_deploydir / (rootfs_name + ".spdx.json")
946
947    with image_spdx_path.open("wb") as f:
948        doc.to_json(f, sort_keys=True, indent=get_json_indent(d))
949
950    num_threads = int(d.getVar("BB_NUMBER_THREADS"))
951
952    visited_docs = set()
953
954    index = {"documents": []}
955
956    spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.zst")
957    with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f:
958        with tarfile.open(fileobj=f, mode="w|") as tar:
959            def collect_spdx_document(path):
960                nonlocal tar
961                nonlocal deploy_dir_spdx
962                nonlocal source_date_epoch
963                nonlocal index
964
965                if path in visited_docs:
966                    return
967
968                visited_docs.add(path)
969
970                with path.open("rb") as f:
971                    doc, sha1 = oe.sbom.read_doc(f)
972                    f.seek(0)
973
974                    if doc.documentNamespace in visited_docs:
975                        return
976
977                    bb.note("Adding SPDX document %s" % path)
978                    visited_docs.add(doc.documentNamespace)
979                    info = tar.gettarinfo(fileobj=f)
980
981                    info.name = doc.name + ".spdx.json"
982                    info.uid = 0
983                    info.gid = 0
984                    info.uname = "root"
985                    info.gname = "root"
986
987                    if source_date_epoch is not None and info.mtime > int(source_date_epoch):
988                        info.mtime = int(source_date_epoch)
989
990                    tar.addfile(info, f)
991
992                    index["documents"].append({
993                        "filename": info.name,
994                        "documentNamespace": doc.documentNamespace,
995                        "sha1": sha1,
996                    })
997
998                for ref in doc.externalDocumentRefs:
999                    ref_path = deploy_dir_spdx / "by-namespace" / ref.spdxDocument.replace("/", "_")
1000                    collect_spdx_document(ref_path)
1001
1002            collect_spdx_document(image_spdx_path)
1003
1004            index["documents"].sort(key=lambda x: x["filename"])
1005
1006            index_str = io.BytesIO(json.dumps(
1007                index,
1008                sort_keys=True,
1009                indent=get_json_indent(d),
1010            ).encode("utf-8"))
1011
1012            info = tarfile.TarInfo()
1013            info.name = "index.json"
1014            info.size = len(index_str.getvalue())
1015            info.uid = 0
1016            info.gid = 0
1017            info.uname = "root"
1018            info.gname = "root"
1019
1020            tar.addfile(info, fileobj=index_str)
1021
1022    spdx_index_path = rootfs_deploydir / (rootfs_name + ".spdx.index.json")
1023    with spdx_index_path.open("w") as f:
1024        json.dump(index, f, sort_keys=True, indent=get_json_indent(d))
1025