Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SPDX generation using spdx-tools #1233

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
710af18
Add SPDX generation using spdx-tools
armintaenzertng Jun 21, 2023
9f27150
Add generator plugins for all SPDX formats
armintaenzertng Jun 23, 2023
d5ed02f
add SPDX generation test
armintaenzertng Jun 29, 2023
afdf71a
add SPDX versioning
armintaenzertng Jul 11, 2023
8b21218
add CLI version parameter
armintaenzertng Jul 13, 2023
aaa3333
update copyright year for new SPDX implementation
armintaenzertng Jul 17, 2023
2350559
update get_purl() according to #1237
armintaenzertng Jul 17, 2023
b43ac47
add SPDX-2.3 features
armintaenzertng Jul 17, 2023
dbacd29
rename some SPDX generation functions
armintaenzertng Jul 17, 2023
f466eab
rename format_version to spdx_version
armintaenzertng Jul 18, 2023
29f3411
address linting issues
armintaenzertng Jul 18, 2023
0b76e03
update and add doc strings
armintaenzertng Jul 18, 2023
9fde993
fix SPDX licensing
armintaenzertng Jul 19, 2023
58db89e
switch SPDX entrypoints to new implementation
armintaenzertng Jul 19, 2023
dee02f6
upgrade spdx-tools to 0.8.0rc3
armintaenzertng Jul 20, 2023
ecdc78e
fix SPDX file handling
armintaenzertng Jul 20, 2023
6e6eea5
workaround for SPDX output with files without checksums
armintaenzertng Jul 20, 2023
aedb5a6
remove temporary SPDX generation test
armintaenzertng Jul 20, 2023
22b3edd
upgrade spdx-tools to 0.8.0
armintaenzertng Jul 25, 2023
bc7cd6a
fix default SPDX version value
armintaenzertng Jul 25, 2023
a0cc74e
fix linting issues
armintaenzertng Jul 26, 2023
4483794
add new SPDX formats to tests
armintaenzertng Jul 26, 2023
65d4e00
change version parameter name to spdx_version in some generate_layer …
armintaenzertng Jul 28, 2023
b2df558
update spdx-tools dependency to 0.8.1
armintaenzertng Aug 24, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions ci/test_files_touched.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,29 @@
'tern report -f json -i photon:3.0',
'tern report -f spdxtagvalue -i photon:3.0',
'tern report -f spdxjson -i photon:3.0',
'tern report -f spdxyaml -i photon:3.0',
'tern report -f spdxxml -i photon:3.0',
'tern report -f spdxrdf -i photon:3.0',
'tern report -d samples/alpine_python/Dockerfile',
'tern report -f html -i photon:3.0',
'tern report -f cyclonedxjson -i photon:3.0'],
# tern/formats/spdx
re.compile('tern/formats/spdx'): [
'tern report -f spdxtagvalue -i photon:3.0 -o spdx.spdx && ' \
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '\
'tern report -f spdxtagvalue -i photon:3.0 -o spdx.spdx && '
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '
'Verify spdx.spdx',
'tern report -f spdxjson -i photon:3.0 -o spdx.json && ' \
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '\
'Verify spdx.json'],
'tern report -f spdxjson -i photon:3.0 -o spdx.json && '
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@armintaenzertng Do these lines need the continuation like the lines that were removed?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The continuation lines were not required in the old code, so I removed them in the new one.
See for example here.

'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '
'Verify spdx.json',
'tern report -f spdxyaml -i photon:3.0 -o spdx.yaml && '
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '
'Verify spdx.yaml',
'tern report -f spdxxml -i photon:3.0 -o spdx.xml && '
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '
'Verify spdx.xml',
'tern report -f spdxrdf -i photon:3.0 -o spdx.rdf.xml && '
'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '
'Verify spdx.rdf.xml'],
# tern/tools
re.compile('tern/tools'):
['tern report -i golang:alpine'],
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ GitPython~=3.1
prettytable~=3.8
packageurl-python>=0.11.1
license-expression>=30.1
spdx-tools>=0.8.1

11 changes: 8 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,19 @@ include_package_data = True
[entry_points]
tern.formats =
default = tern.formats.default.generator:Default
spdxtagvalue = tern.formats.spdx.spdxtagvalue.generator:SpdxTagValue
spdxjson = tern.formats.spdx.spdxjson.generator:SpdxJSON
spdxjsonc = tern.formats.spdx.spdxjson.consumer:SpdxJSON
json = tern.formats.json.generator:JSON
jsonc = tern.formats.json.consumer:JSON
yaml = tern.formats.yaml.generator:YAML
html = tern.formats.html.generator:HTML
cyclonedxjson = tern.formats.cyclonedx.cyclonedxjson.generator:CycloneDXJSON
spdxjson = tern.formats.spdx.spdxjson.generator:SpdxJSON
spdxyaml = tern.formats.spdx.spdxyaml.generator:SpdxYAML
spdxxml = tern.formats.spdx.spdxxml.generator:SpdxXML
spdxrdf = tern.formats.spdx.spdxrdf.generator:SpdxRDF
spdxtagvalue = tern.formats.spdx.spdxtagvalue.generator:SpdxTagValue
spdxtagvalue_legacy = tern.formats.spdx_legacy.spdxtagvalue.generator:SpdxTagValue
spdxjson_legacy = tern.formats.spdx_legacy.spdxjson.generator:SpdxJSON
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will look into this today... but I think we may need to rename the SpdxJSON class here to SpdxJSONLegacy in the actual file at tern/formats/spdx_legacy/spdxjson/generator.py. Same with the SpdxTagValue class in the legacy code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you say that?
I just tried running tern report -f spdxjson_legacy -i photon:3.0 and it worked without errors for me.

We can of course rename them to make it clearer that this code will be deprecated.

spdxjsonc = tern.formats.spdx_legacy.spdxjson.consumer:SpdxJSON
rnjudge marked this conversation as resolved.
Show resolved Hide resolved
tern.extensions =
cve_bin_tool = tern.extensions.cve_bin_tool.executor:CveBinTool
scancode = tern.extensions.scancode.executor:Scancode
Expand Down
9 changes: 7 additions & 2 deletions tern/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,13 @@ def main():
metavar='REPORT_FORMAT',
help="Format the report using one of the "
"available formats: "
"spdxtagvalue, spdxjson, cyclonedxjson, json, "
"yaml, html")
"spdxtagvalue, spdxjson, spdxyaml, spdxxml, spdxrdf, "
"cyclonedxjson, json, yaml, html")
parser_report.add_argument('-sv', '--spdx-version',
metavar='SPDX_VERSION',
help="Specify the version of the SPDX report format."
"This will be ignored for all non-SPDX formats."
"Possible values are '2.2' (default) or '2.3'.")
parser_report.add_argument('-o', '--output-file', default=None,
metavar='FILE',
help="Write the report to a file. "
Expand Down
4 changes: 3 additions & 1 deletion tern/formats/cyclonedx/cyclonedxjson/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ def get_document_dict(image_obj_list):


class CycloneDXJSON(generator.Generate):
def generate(self, image_obj_list, print_inclusive=False):
def generate(self, image_obj_list, spdx_version, print_inclusive=False):
''' Generate a CycloneDX document
The whole document should be stored in a dictionary which can be
converted to JSON and dumped to a file using the write_report function
in report.py. '''
logger.debug('Generating CycloneDX JSON document...')
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for CycloneDX JSON.")

report = get_document_dict(image_obj_list)

Expand Down
10 changes: 8 additions & 2 deletions tern/formats/default/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,11 @@ def print_licenses_only(image_obj_list):


class Default(generator.Generate):
def generate(self, image_obj_list, print_inclusive=False):
def generate(self, image_obj_list, spdx_version: str, print_inclusive=False):
'''Generate a default report'''
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for the default format.")

report = formats.disclaimer.format(
version_info=content.get_tool_version())
logger.debug('Creating a detailed report of components in image...')
Expand All @@ -154,8 +157,11 @@ def generate(self, image_obj_list, print_inclusive=False):
return report
return report + print_licenses_only(image_obj_list)

def generate_layer(self, layer):
def generate_layer(self, layer, spdx_version: str):
"""Generate a default report for one layer object"""
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for the default format.")

report = formats.disclaimer.format(
version_info=content.get_tool_version())
logger.debug("Generating summary report for layer...")
Expand Down
2 changes: 1 addition & 1 deletion tern/formats/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
class Generate(metaclass=ABCMeta):
'''Base class for report plugins'''
@abstractmethod
def generate(self, image_obj_list, print_inclusive=False):
def generate(self, image_obj_list, spdx_version, print_inclusive=False):
'''Format the report according to the plugin style.
Each subclass is responsible for their own formatting.'''
10 changes: 8 additions & 2 deletions tern/formats/html/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,16 +321,22 @@ def get_report_dict(image_obj_list):


class HTML(generator.Generate):
def generate(self, image_obj_list, print_inclusive=False):
def generate(self, image_obj_list, spdx_version: str, print_inclusive=False):
'''Given a list of image objects, create a html report
for the images'''
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for HTML.")

report_dict = get_report_dict(image_obj_list)
report = create_html_report(report_dict, image_obj_list)
return report

def generate_layer(self, layer):
def generate_layer(self, layer, spdx_version: str):
"""Given a layer object, create a html report for the layer"""
logger.debug("Creating HTML report...")
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for HTML.")

report = ""
report = report + '\n' + head_layer % (css, get_tool_version())
report = report + '\n' + report_dict_to_html(layer.to_dict())
Expand Down
14 changes: 12 additions & 2 deletions tern/formats/json/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,28 @@
"""

import json
import logging

from tern.formats import generator
from tern.utils import constants

logger = logging.getLogger(constants.logger_name)

class JSON(generator.Generate):
def generate(self, image_obj_list, print_inclusive=False):
def generate(self, image_obj_list, spdx_version: str, print_inclusive=False):
'''Given a list of image objects, create a json object string'''
image_list = []
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for JSON.")

for image in image_obj_list:
image_list.append({'image': image.to_dict()})
image_dict = {'images': image_list}
return json.dumps(image_dict)

def generate_layer(self, layer):
def generate_layer(self, layer, spdx_version: str):
"""Create a json object for one layer"""
if spdx_version is not None:
logger.warning("The SPDX version parameter is not supported for JSON.")

return json.dumps(layer.to_dict())
4 changes: 0 additions & 4 deletions tern/formats/spdx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause
15 changes: 15 additions & 0 deletions tern/formats/spdx/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from spdx_tools.spdx.model import Version

DOCUMENT_ID = 'SPDXRef-DOCUMENT'
DOCUMENT_NAME = 'Tern report for {image_name}'
SPDX_VERSION = 'SPDX-2.2'
DATA_LICENSE = 'CC0-1.0'
DOCUMENT_COMMENT = 'This document was generated by ' \
'the Tern Project: https://github.com/tern-tools/tern'
DOCUMENT_NAMESPACE = 'https://spdx.org/spdxdocs/tern-' \
'report-{version}-{image}-{uuid}'
LICENSE_LIST_VERSION = Version(3, 20)
CREATOR_NAME = 'tern-{version}'
DOCUMENT_NAME_SNAPSHOT = 'Tern SPDX SBoM'
DOCUMENT_NAMESPACE_SNAPSHOT = 'https://spdx.org/spdxdocs/tern-report-' \
'{timestamp}-{uuid}'
134 changes: 134 additions & 0 deletions tern/formats/spdx/file_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 VMWare, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

"""
File level helpers for SPDX document generator
"""
import logging
from datetime import datetime
from typing import List

from spdx_tools.spdx.model import File as SpdxFile, SpdxNone, SpdxNoAssertion, Checksum, ChecksumAlgorithm, FileType

from tern.classes.file_data import FileData
from tern.classes.image import Image
from tern.classes.image_layer import ImageLayer
from tern.classes.template import Template
from tern.formats.spdx.layer_helpers import get_layer_checksum
from tern.formats.spdx.general_helpers import get_package_license_declared, get_file_spdxref
from tern.utils import constants

logger = logging.getLogger(constants.logger_name)


def get_spdx_file_list_from_layer(layer_obj: ImageLayer, template: Template, timestamp: datetime, spdx_version: str) -> List[SpdxFile]:
"""Given a layer object and the SPDX template mapping, return a list
of SPDX Files for each file in the layer"""
spdx_files: List[SpdxFile] = []
file_refs = set()
for filedata in layer_obj.files:
# we do not know the layer's id, so we will use the timestamp instead
file_ref = get_file_spdxref(filedata, str(timestamp))
if file_ref not in file_refs:
spdx_files.append(get_spdx_file_from_filedata(filedata, template, str(timestamp), spdx_version))
file_refs.add(file_ref)
return spdx_files


def get_spdx_file_list_from_image(image_obj: Image, template: Template, spdx_version: str) -> List[SpdxFile]:
"""Given an image_obj object, and the SPDX template mapping, return a list
of SPDX Files for each file in each layer of the image."""
spdx_files: List[SpdxFile] = []

# use file refs to keep track of duplicate files that may be located
# in different places in the filesystem
file_refs = set()
for layer in image_obj.layers:
if layer.files_analyzed:
layer_checksum_value = get_layer_checksum(layer).value
for filedata in layer.files:
# we use the layer checksum as the layer id
file_ref = get_file_spdxref(filedata, layer_checksum_value)
if file_ref not in file_refs:
spdx_files.append(get_spdx_file_from_filedata(filedata, template, layer_checksum_value, spdx_version))
file_refs.add(file_ref)
return spdx_files


def get_spdx_file_from_filedata(filedata: FileData, template: Template, layer_id: str, spdx_version: str) -> SpdxFile:
"""Given a FileData object and its SPDX template mapping, return an
SPDX representation of the file. A layer_id is used to
distinguish copies of the same file occurring in different places in the
image"""
mapping = filedata.to_dict(template)

if filedata.licenses:
# Add the license expression to the list if it is a valid SPDX
# identifier; otherwise, add the LicenseRef
license_info_in_file = [get_package_license_declared(lic) for lic in set(filedata.licenses)]
else:
license_info_in_file = [SpdxNone()]

file_notice = get_file_notice(filedata)
file_comment = get_file_comment(filedata)
file_contributors = get_file_contributors(filedata)

file_types = None
if mapping['FileType']:
file_types = [FileType[mapping['FileType'].upper()]]

return SpdxFile(
spdx_id=get_file_spdxref(filedata, layer_id),
name=mapping['FileName'],
checksums=[get_file_checksum(filedata)],
license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, # we don't provide this
copyright_text=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, # we don't know this
file_types=file_types,
license_info_in_file=license_info_in_file,
notice=file_notice if file_notice else None,
comment=file_comment if file_comment else None,
contributors=file_contributors if file_contributors else None,
)


def get_file_checksum(filedata: FileData) -> Checksum:
"""Given a FileData object, return the checksum required by SPDX.
Currently, the spec requires a SHA1 checksum"""
checksum = filedata.get_checksum('sha1')
if not checksum:
logger.error("No SHA1 checksum found in file. Resorting to empty file checksum.")
checksum = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
return Checksum(ChecksumAlgorithm.SHA1, checksum)


def get_file_notice(filedata: FileData) -> str:
"""Return a formatted string with all copyrights found in a file. Return
an empty string if there are no copyrights"""
notice = ''
for cp in filedata.copyrights:
notice = notice + cp + '\n'
return notice


def get_file_comment(filedata: FileData) -> str:
"""Return a formatted comment string with all file level notices. Return
an empty string if no notices are present"""
comment = ''
for origin in filedata.origins.origins:
comment = comment + f'{origin.origin_str}:' + '\n'
for notice in origin.notices:
comment = comment + \
f'{notice.level}: {notice.message}' + '\n'
return comment


def get_file_contributors(filedata: FileData) -> List[str]:
"""The SPDX spec allows for an optional list of file contributors.
If there are any authors found in the file, return a list of authors.
If empty, return an empty list"""
contributors = []
for author in filedata.authors:
contributors.append(author)
return contributors
Loading
Loading