From 0565925c18ed58e425d9bf63de3791421e08b6a6 Mon Sep 17 00:00:00 2001 From: Tony Aiuto Date: Tue, 8 Nov 2022 12:39:22 -0500 Subject: [PATCH 1/3] Explore ways to have more package metadata providers. Licenses are still special because they need to go down to the license_kind, but other metadata (self-contained) providers can be gathered generically. This PR has two parallel implementations so we can explore the ramifactions of each. The early attempt was to make PackageInfo a first class element, equal to LicensesInfo in the way we recursively gather it. The problem with that approach is that it requires you to scale licenses_core.bzl with each new type. A different approach is to have a generic TransitiveMetadataInfo collector that can hold many different types of provider in it, as long as they all obey the same protocol for identifying their type. We pass a provider list to gather_licenses_info_common() to select what to include. This way, a user adding a private metadata type only needs to add a new write-json capability for their custom provider. That is still a little ugly, but at least we can share gather_licenses_info_common(). --- rules/gather_licenses_info.bzl | 2 +- rules/gather_metadata.bzl | 298 +++++++++++++++++++++++++++++++++ rules/licenses_core.bzl | 43 ++++- rules/package_info.bzl | 117 +++++++++++++ rules/providers.bzl | 71 ++++++-- rules/sbom.bzl | 159 ++++++++++++++++++ tools/BUILD | 19 ++- tools/checker_demo.py | 2 - tools/write_sbom.py | 95 +++++++++++ 9 files changed, 777 insertions(+), 29 deletions(-) create mode 100644 rules/gather_metadata.bzl create mode 100644 rules/package_info.bzl create mode 100644 rules/sbom.bzl create mode 100644 tools/write_sbom.py diff --git a/rules/gather_licenses_info.bzl b/rules/gather_licenses_info.bzl index a5f1a41..997fe03 100644 --- a/rules/gather_licenses_info.bzl +++ b/rules/gather_licenses_info.bzl @@ -41,7 +41,7 @@ def _strip_null_repo(label): return s def _gather_licenses_info_impl(target, ctx): - return gather_licenses_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, should_traverse) + return gather_licenses_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, [], should_traverse) gather_licenses_info = aspect( doc = """Collects LicenseInfo providers into a single TransitiveLicensesInfo provider.""", diff --git a/rules/gather_metadata.bzl b/rules/gather_metadata.bzl new file mode 100644 index 0000000..8a6a67e --- /dev/null +++ b/rules/gather_metadata.bzl @@ -0,0 +1,298 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rules and macros for collecting LicenseInfo providers.""" + +load( + "@rules_license//rules:licenses_core.bzl", + "TraceInfo", + "gather_licenses_info_common", + "should_traverse", +) +load( + "@rules_license//rules:providers.bzl", + "MetadataInfo", + "PackageInfo", + "TransitiveMetadataInfo", +) + +# Definition for compliance namespace, used for filtering licenses +# based on the namespace to which they belong. +NAMESPACES = ["compliance"] + +def _strip_null_repo(label): + """Removes the null repo name (e.g. @//) from a string. + + The is to make str(label) compatible between bazel 5.x and 6.x + """ + s = str(label) + if s.startswith('@//'): + return s[1:] + elif s.startswith('@@//'): + return s[2:] + return s + +def _gather_metadata_info_impl(target, ctx): + return gather_licenses_info_common(target, ctx, TransitiveMetadataInfo, NAMESPACES, [MetadataInfo, PackageInfo], should_traverse) + +gather_metadata_info = aspect( + doc = """Collects LicenseInfo providers into a single TransitiveMetadataInfo provider.""", + implementation = _gather_metadata_info_impl, + attr_aspects = ["*"], + attrs = { + "_trace": attr.label(default = "@rules_license//rules:trace_target"), + }, + provides = [TransitiveMetadataInfo], + apply_to_generating_rules = True, +) + +def _write_metadata_info_impl(target, ctx): + """Write transitive license info into a JSON file + + Args: + target: The target of the aspect. + ctx: The aspect evaluation context. + + Returns: + OutputGroupInfo + """ + + if not TransitiveMetadataInfo in target: + return [OutputGroupInfo(licenses = depset())] + info = target[TransitiveMetadataInfo] + outs = [] + + # If the result doesn't contain licenses, we simply return the provider + if not hasattr(info, "target_under_license"): + return [OutputGroupInfo(licenses = depset())] + + # Write the output file for the target + name = "%s_metadata_info.json" % ctx.label.name + content = "[\n%s\n]\n" % ",\n".join(metadata_info_to_json(info)) + out = ctx.actions.declare_file(name) + ctx.actions.write( + output = out, + content = content, + ) + outs.append(out) + + if ctx.attr._trace[TraceInfo].trace: + trace = ctx.actions.declare_file("%s_trace_info.json" % ctx.label.name) + ctx.actions.write(output = trace, content = "\n".join(info.traces)) + outs.append(trace) + + return [OutputGroupInfo(licenses = depset(outs))] + +gather_metadata_info_and_write = aspect( + doc = """Collects TransitiveMetadataInfo providers and writes JSON representation to a file. + + Usage: + blaze build //some:target \ + --aspects=@rules_license//rules:gather_metadata_info.bzl%gather_metadata_info_and_write + --output_groups=licenses + """, + implementation = _write_metadata_info_impl, + attr_aspects = ["*"], + attrs = { + "_trace": attr.label(default = "@rules_license//rules:trace_target"), + }, + provides = [OutputGroupInfo], + requires = [gather_metadata_info], + apply_to_generating_rules = True, +) + +def write_metadata_info(ctx, deps, json_out): + """Writes TransitiveMetadataInfo providers for a set of targets as JSON. + + TODO(aiuto): Document JSON schema. But it is under development, so the current + best place to look is at tests/hello_licenses.golden. + + Usage: + write_metadata_info must be called from a rule implementation, where the + rule has run the gather_metadata_info aspect on its deps to + collect the transitive closure of LicenseInfo providers into a + LicenseInfo provider. + + foo = rule( + implementation = _foo_impl, + attrs = { + "deps": attr.label_list(aspects = [gather_metadata_info]) + } + ) + + def _foo_impl(ctx): + ... + out = ctx.actions.declare_file("%s_licenses.json" % ctx.label.name) + write_metadata_info(ctx, ctx.attr.deps, metadata_file) + + Args: + ctx: context of the caller + deps: a list of deps which should have TransitiveMetadataInfo providers. + This requires that you have run the gather_metadata_info + aspect over them + json_out: output handle to write the JSON info + """ + licenses = [] + for dep in deps: + if TransitiveMetadataInfo in dep: + licenses.extend(metadata_info_to_json(dep[TransitiveMetadataInfo])) + ctx.actions.write( + output = json_out, + content = "[\n%s\n]\n" % ",\n".join(licenses), + ) + +def metadata_info_to_json(metadata_info): + """Render a single LicenseInfo provider to JSON + + Args: + metadata_info: A LicenseInfo. + + Returns: + [(str)] list of LicenseInfo values rendered as JSON. + """ + + main_template = """ {{ + "top_level_target": "{top_level_target}", + "dependencies": [{dependencies} + ], + "licenses": [{licenses} + ], + "packages": [{packages} + ]\n }}""" + + dep_template = """ + {{ + "target_under_license": "{target_under_license}", + "licenses": [ + {licenses} + ] + }}""" + + # TODO(aiuto): 'rule' is a duplicate of 'label' until old users are transitioned + license_template = """ + {{ + "label": "{label}", + "rule": "{label}", + "license_kinds": [{kinds} + ], + "copyright_notice": "{copyright_notice}", + "package_name": "{package_name}", + "package_url": "{package_url}", + "package_version": "{package_version}", + "license_text": "{license_text}", + "used_by": [ + {used_by} + ] + }}""" + + kind_template = """ + {{ + "target": "{kind_path}", + "name": "{kind_name}", + "conditions": {kind_conditions} + }}""" + + package_info_template = """ + {{ + "target": "{label}", + "copyright_notice": "{copyright_notice}", + "package_name": "{package_name}", + "package_url": "{package_url}", + "package_version": "{package_version}" + }}""" + + # Build reverse map of license to user + used_by = {} + for dep in metadata_info.deps.to_list(): + # Undo the concatenation applied when stored in the provider. + dep_licenses = dep.licenses.split(",") + for license in dep_licenses: + if license not in used_by: + used_by[license] = [] + used_by[license].append(_strip_null_repo(dep.target_under_license)) + + all_licenses = [] + for license in sorted(metadata_info.licenses.to_list(), key = lambda x: x.label): + kinds = [] + for kind in sorted(license.license_kinds, key = lambda x: x.name): + kinds.append(kind_template.format( + kind_name = kind.name, + kind_path = kind.label, + kind_conditions = kind.conditions, + )) + + if license.license_text: + # Special handling for synthetic LicenseInfo + text_path = (license.license_text.package + "/" + license.license_text.name if type(license.license_text) == "Label" else license.license_text.path) + all_licenses.append(license_template.format( + copyright_notice = license.copyright_notice, + kinds = ",".join(kinds), + license_text = text_path, + package_name = license.package_name, + package_url = license.package_url, + package_version = license.package_version, + label = _strip_null_repo(license.label), + used_by = ",\n ".join(sorted(['"%s"' % x for x in used_by[str(license.label)]])), + )) + + all_deps = [] + for dep in sorted(metadata_info.deps.to_list(), key = lambda x: x.target_under_license): + metadata_used = [] + + # Undo the concatenation applied when stored in the provider. + dep_licenses = dep.licenses.split(",") + all_deps.append(dep_template.format( + target_under_license = _strip_null_repo(dep.target_under_license), + licenses = ",\n ".join(sorted(['"%s"' % _strip_null_repo(x) for x in dep_licenses])), + )) + + all_packages = [] + # We would use this if we had distinct depsets for every provider type. + #for package in sorted(metadata_info.package_info.to_list(), key = lambda x: x.label): + # all_packages.append(package_info_template.format( + # label = _strip_null_repo(package.label), + # copyright_notice = package.copyright_notice, + # package_name = package.package_name, + # package_url = package.package_url, + # package_version = package.package_version, + # )) + + for mi in sorted(metadata_info.other_metadata.to_list(), key = lambda x: x.label): + # Maybe use a map of provider class to formatter. A generic dict->json function + # in starlark would help + + # This format is for using distinct providers. I like the compile time safety. + if mi.type == "package_info": + all_packages.append(package_info_template.format( + label = _strip_null_repo(mi.label), + copyright_notice = mi.copyright_notice, + package_name = mi.package_name, + package_url = mi.package_url, + package_version = mi.package_version, + )) + # This format is if use data as plail old dict. sort of ugly. + if mi.type == "package_info2": + all_packages.append(package_info_template.format( + label = _strip_null_repo(mi.label), + copyright_notice = mi.data.get("copyright_notice") or "", + package_name = mi.data.get("package_name") or "", + package_url = mi.data.get("package_url") or "", + package_version = mi.data.get("package_version") or "", + )) + + return [main_template.format( + top_level_target = _strip_null_repo(metadata_info.target_under_license), + dependencies = ",".join(all_deps), + licenses = ",".join(all_licenses), + packages = ",".join(all_packages), + )] diff --git a/rules/licenses_core.bzl b/rules/licenses_core.bzl index 42702bd..9891ab1 100644 --- a/rules/licenses_core.bzl +++ b/rules/licenses_core.bzl @@ -66,7 +66,7 @@ def should_traverse(ctx, attr): return True -def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, filter_func): +def _get_transitive_licenses(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider, filter_func): attrs = [a for a in dir(ctx.rule.attr)] for name in attrs: if not filter_func(ctx, name): @@ -96,7 +96,20 @@ def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, for trace in info.traces: traces.append("(" + ", ".join([str(ctx.label), ctx.rule.kind, name]) + ") -> " + trace) -def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filter_func): + # We only need one or the other of these stanzas. + # If we use a polymorphic approach to metadata providers, then + # this works. + if hasattr(info, "other_metadata"): + if info.other_metadata: + trans_other_metadata.append(info.other_metadata) + # But if we want more precise type safety, we would have a + # trans_* for each type of metadata. That is not user + # extensibile. + if hasattr(info, "package_info"): + if info.package_info: + trans_package_info.append(info.package_info) + +def gather_licenses_info_common(target, ctx, provider, namespaces, metadata_providers, filter_func): """Collect license info from myself and my deps. Any single target might directly depend on a license, or depend on @@ -114,8 +127,9 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte Args: target: The target of the aspect. ctx: The aspect evaluation context. - provider_factory: abstracts the provider returned by this aspect + provider: abstracts the provider returned by this aspect namespaces: a list of namespaces licenses must match to be included + metadata_providers: a list of other providers of interest filter_func: a function that returns true iff the dep edge should be ignored Returns: @@ -124,6 +138,8 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte # First we gather my direct license attachments licenses = [] + other_metadata = [] + package_info = [] if ctx.rule.kind == "_license": # Don't try to gather licenses from the license rule itself. We'll just # blunder into the text file of the license and pick up the default @@ -144,17 +160,24 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte licenses.append(lic) else: fail("should have a namespace") - + for m_p in metadata_providers: + if m_p in dep: + other_metadata.append(dep[m_p]) + # Remove: Only here to show fanout from explict providers. + # if PackageInfo in dep: + # package_info.append(dep[PackageInfo]) # Now gather transitive collection of providers from the targets # this target depends upon. trans_licenses = [] + trans_other_metadata = [] + trans_package_info = [] trans_deps = [] traces = [] - _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider_factory, filter_func) + _get_transitive_licenses(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider, filter_func) if not licenses and not trans_licenses: - return [provider_factory(deps = depset(), licenses = depset(), traces = [])] + return [provider(deps = depset(), licenses = depset(), traces = [])] # If this is the target, start the sequence of traces. if ctx.attr._trace[TraceInfo].trace and ctx.attr._trace[TraceInfo].trace in str(ctx.label): @@ -179,9 +202,15 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte else: direct_license_uses = None - return [provider_factory( + return [provider( target_under_license = target.label, licenses = depset(tuple(licenses), transitive = trans_licenses), + # Note: The TransitiveLicensesInfo initializer drops this. + # A less memory intensive solution would be to never collect it. + other_metadata = depset(tuple(other_metadata), transitive = trans_other_metadata), + # Remove: Only here to show the fanout from being explicit about + # each provider type. + # package_info = depset(tuple(package_info), transitive = trans_package_info), deps = depset(direct = direct_license_uses, transitive = trans_deps), traces = traces, )] diff --git a/rules/package_info.bzl b/rules/package_info.bzl new file mode 100644 index 0000000..4acf3c6 --- /dev/null +++ b/rules/package_info.bzl @@ -0,0 +1,117 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rules for declaring metadata about a package.""" + +load( + "@rules_license//rules:providers.bzl", + "MetadataInfo", + "PackageInfo", +) + +# Debugging verbosity +_VERBOSITY = 0 + +def _debug(loglevel, msg): + if _VERBOSITY > loglevel: + print(msg) # buildifier: disable=print + +# +# package_info() +# + +def _package_info_impl(ctx): + provider = PackageInfo( + type = "package_info", + label = ctx.label, + copyright_notice = ctx.attr.copyright_notice, + package_name = ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"), + package_url = ctx.attr.package_url, + package_version = ctx.attr.package_version, + ) + generic_provider = MetadataInfo( + type = "package_info2", + label = ctx.label, + # Alternate design. data should be a package_info. That may be easier + # to work with. It probably uses more memory though. While we have a + # map here instead of an object pointer, the provider is basically + # a name/value map anyway, so there is no space savings. + data = { + # DNS: Remove Ahoy: + "copyright_notice": "Ahoy: " + ctx.attr.copyright_notice, + "package_name": ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"), + "package_url": ctx.attr.package_url, + # DNS: remove ++ + "package_version": ctx.attr.package_version + "++" + } + ) + _debug(0, provider) + return [provider, generic_provider] + +_package_info = rule( + implementation = _package_info_impl, + attrs = { + "copyright_notice": attr.string( + doc = "Copyright notice.", + ), + "package_name": attr.string( + doc = "A human readable name identifying this package." + + " This may be used to produce an index of OSS packages used by" + + " an applicatation.", + ), + "package_url": attr.string( + doc = "The URL this instance of the package was download from." + + " This may be used to produce an index of OSS packages used by" + + " an applicatation.", + ), + "package_version": attr.string( + doc = "A human readable version string identifying this package." + + " This may be used to produce an index of OSS packages used" + + " by an applicatation. It should be a value that" + + " increases over time, rather than a commit hash." + ), + }, +) + +# buildifier: disable=function-docstring-args +def package_info( + name, + copyright_notice = None, + package_name = None, + package_url = None, + package_version = None, + visibility = ["//visibility:public"]): + """Wrapper for package_info rule. + + Args: + name: str target name. + license_kind: label a single license_kind. Only one of license_kind or license_kinds may + be specified + license_kinds: list(label) list of license_kind targets. + copyright_notice: str Copyright notice associated with this package. + package_name : str A human readable name identifying this package. This + may be used to produce an index of OSS packages used by + an application. + tags: list(str) tags applied to the rule + """ + _package_info( + name = name, + copyright_notice = copyright_notice, + package_name = package_name, + package_url = package_url, + package_version = package_version, + applicable_licenses = [], + visibility = visibility, + tags = [], + testonly = 0, + ) diff --git a/rules/providers.bzl b/rules/providers.bzl index 8778fd7..0f1d74e 100644 --- a/rules/providers.bzl +++ b/rules/providers.bzl @@ -46,16 +46,61 @@ LicensedTargetInfo = provider( }, ) -def licenses_info(): - return provider( - doc = """The transitive set of licenses used by a target.""", - fields = { - "target_under_license": "Label: The top level target label.", - "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.", - "licenses": "depset(LicenseInfo)", - "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.", - }, - ) - -# This provider is used by the aspect that is used by manifest() rules. -TransitiveLicensesInfo = licenses_info() +# Constructor to reduce larger set of gathered data to what we want. +def TransitiveLicensesInfoInit(target_under_license=None, licenses=None, deps=None, traces=None, **kwargs): + return { + "target_under_license": target_under_license, + "deps": deps, + "licenses": licenses, + "traces": traces, + } + +TransitiveLicensesInfo, _raw_TransitiveLicensesInfo = provider( + doc = """The transitive set of licenses used by a target.""", + fields = { + "target_under_license": "Label: The top level target label.", + "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.", + "licenses": "depset(LicenseInfo)", + "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.", + }, + init = TransitiveLicensesInfoInit, +) + +# This is one way to do specify data +PackageInfo = provider( + doc = """Provides information about a package.""", + fields = { + "type": "string: How to interpret data", + "label": "Label: label of the package_info rule", + "copyright_notice": "string: Human readable short copyright notice", + "package_name": "string: Human readable package name", + "package_url": "URL from which this package was downloaded.", + "package_version": "Human readable version string", + }, +) + +# This is more extensible. Because of the provider implementation, having a big +# dict of values rather than named fields is not much more costly. +# Design choice. Replace data with actual providers, such as PackageInfo +MetadataInfo = provider( + doc = """Generic bag of metadata.""", + fields = { + "type": "string: How to interpret data", + "label": "Label: label of the metadata rule", + "data": "String->any: Map of names to values", + } +) + +TransitiveMetadataInfo = provider( + doc = """The transitive set of licenses used by a target.""", + fields = { + "top_level_target": "Label: The top level target label.", + "other_metadata": "depset(MetatdataInfo)", + "licenses": "depset(LicenseInfo)", + "package_info": "depset(PackageInfo)", + + "target_under_license": "Label: The top level target label.", + "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.", + "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.", + }, +) diff --git a/rules/sbom.bzl b/rules/sbom.bzl new file mode 100644 index 0000000..fb17adc --- /dev/null +++ b/rules/sbom.bzl @@ -0,0 +1,159 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SBOM generation""" + +load( + "@rules_license//rules:gather_metadata.bzl", + "gather_metadata_info", + "gather_metadata_info_and_write", + "write_metadata_info", +) +load( + "@rules_license//rules:providers.bzl", + "TransitiveLicensesInfo", +) + +# This rule is proof of concept, and may not represent the final +# form of a rule for compliance validation. +def _generate_sbom_impl(ctx): + # Gather all licenses and write information to one place + + licenses_file = ctx.actions.declare_file("_%s_licenses_info.json" % ctx.label.name) + write_metadata_info(ctx, ctx.attr.deps, licenses_file) + + license_files = [] + # if ctx.outputs.license_texts: + # license_files = get_licenses_mapping(ctx.attr.deps).keys() + + # Now turn the big blob of data into something consumable. + inputs = [licenses_file] + outputs = [ctx.outputs.out] + args = ctx.actions.args() + args.add("--licenses_info", licenses_file.path) + args.add("--out", ctx.outputs.out.path) + ctx.actions.run( + mnemonic = "CreateSBOM", + progress_message = "Creating SBOM for %s" % ctx.label, + inputs = inputs, + outputs = outputs, + executable = ctx.executable._sbom_generator, + arguments = [args], + ) + outputs.append(licenses_file) # also make the json file available. + return [DefaultInfo(files = depset(outputs))] + +_generate_sbom = rule( + implementation = _generate_sbom_impl, + attrs = { + "deps": attr.label_list( + aspects = [gather_metadata_info], + ), + "out": attr.output(mandatory = True), + "_sbom_generator": attr.label( + default = Label("@rules_license//tools:write_sbom"), + executable = True, + allow_files = True, + cfg = "exec", + ), + }, +) + +def generate_sbom(**kwargs): + _generate_sbom(**kwargs) + +def _manifest_impl(ctx): + # Gather all licenses and make it available as deps for downstream rules + # Additionally write the list of license filenames to a file that can + # also be used as an input to downstream rules. + licenses_file = ctx.actions.declare_file(ctx.attr.out.name) + mappings = get_licenses_mapping(ctx.attr.deps, ctx.attr.warn_on_legacy_licenses) + ctx.actions.write( + output = licenses_file, + content = "\n".join([",".join([f.path, p]) for (f, p) in mappings.items()]), + ) + return [DefaultInfo(files = depset(mappings.keys()))] + +_manifest = rule( + implementation = _manifest_impl, + doc = """Internal tmplementation method for manifest().""", + attrs = { + "deps": attr.label_list( + doc = """List of targets to collect license files for.""", + aspects = [gather_metadata_info], + ), + "out": attr.output( + doc = """Output file.""", + mandatory = True, + ), + "warn_on_legacy_licenses": attr.bool(default = False), + }, +) + +def manifest(name, deps, out = None, **kwargs): + if not out: + out = name + ".manifest" + + _manifest(name = name, deps = deps, out = out, **kwargs) + +def _licenses_used_impl(ctx): + # Gather all licenses and make it available as JSON + write_metadata_info(ctx, ctx.attr.deps, ctx.outputs.out) + return [DefaultInfo(files = depset([ctx.outputs.out]))] + +_licenses_used = rule( + implementation = _licenses_used_impl, + doc = """Internal tmplementation method for licenses_used().""", + attrs = { + "deps": attr.label_list( + doc = """List of targets to collect LicenseInfo for.""", + aspects = [gather_metadata_info_and_write], + ), + "out": attr.output( + doc = """Output file.""", + mandatory = True, + ), + }, +) + +def get_licenses_mapping(deps, warn = False): + """Creates list of entries representing all licenses for the deps. + + Args: + + deps: a list of deps which should have TransitiveLicensesInfo providers. + This requires that you have run the gather_licenses_info + aspect over them + + warn: boolean, if true, display output about legacy targets that need + update + + Returns: + {File:package_name} + """ + tls = [] + for dep in deps: + lds = dep[TransitiveLicensesInfo].licenses + tls.append(lds) + + ds = depset(transitive = tls) + + # Ignore any legacy licenses that may be in the report + mappings = {} + for lic in ds.to_list(): + if type(lic.license_text) == "File": + mappings[lic.license_text] = lic.package_name + elif warn: + print("Legacy license %s not included, rule needs updating" % lic.license_text) + + return mappings diff --git a/tools/BUILD b/tools/BUILD index 9be1c2d..bc4005f 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -21,6 +21,14 @@ package( licenses(["notice"]) +filegroup( + name = "standard_package", + srcs = glob(["**"]), + visibility = ["//distro:__pkg__"], +) + +exports_files(["diff_test.sh"]) + py_binary( name = "checker_demo", srcs = ["checker_demo.py"], @@ -28,10 +36,9 @@ py_binary( visibility = ["//visibility:public"], ) -exports_files(["diff_test.sh"]) - -filegroup( - name = "standard_package", - srcs = glob(["**"]), - visibility = ["//distro:__pkg__"], +py_binary( + name = "write_sbom", + srcs = ["write_sbom.py"], + python_version = "PY3", + visibility = ["//visibility:public"], ) diff --git a/tools/checker_demo.py b/tools/checker_demo.py index 1075621..6cdf07f 100644 --- a/tools/checker_demo.py +++ b/tools/checker_demo.py @@ -90,7 +90,6 @@ def _do_copyright_notices(out, licenses): if l.get('package_version'): name = name + "/" + l['package_version'] # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one. - print(l) out.write('package(%s), copyright(%s)\n' % (name, l['copyright_notice'])) @@ -122,7 +121,6 @@ def main(): top_level_target = target['top_level_target'] dependencies = target['dependencies'] licenses = target['licenses'] - print(licenses) err = 0 with codecs.open(args.report, mode='w', encoding='utf-8') as rpt: diff --git a/tools/write_sbom.py b/tools/write_sbom.py new file mode 100644 index 0000000..c7c56ba --- /dev/null +++ b/tools/write_sbom.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Proof of concept license checker. + +This is only a demonstration. It will be replaced with other tools. +""" + +import argparse +import codecs +import json + + +def _load_package_data(package_info): + with codecs.open(package_info, encoding='utf-8') as inp: + return json.loads(inp.read()) + + +def unique_licenses(licenses): + for target in licenses: + for lic in target.get('licenses') or []: + yield lic + +def _write_sbom(out, packages): + """Produce a basic SBOM + + Args: + out: file object to write to + packages: package metadata. A big blob of JSON. + """ + for p in packages: + name = p.get('package_name') or '' + if p.get('package_version'): + name = name + "/" + p['package_version'] + out.write('# %s\n' % name) + # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one. + cn = p.get('copyright_notice') + if cn: + out.write(' copyright: %s\n' % cn) + kinds = p.get('license_kinds') + if kinds: + out.write(' license(s): "%s"\n' % + ','.join([k['name'] for k in kinds])) + url = p.get('package_url') + if url: + out.write(' package URL: %s\n' % url) + + """ + for target in unique_licenses(licenses): + for lic in target.get('licenses') or []: + print("lic:", lic) + rule = lic['rule'] + for kind in lic['license_kinds']: + out.write('= %s\n kind: %s\n' % (rule, kind['target'])) + out.write(' conditions: %s\n' % kind['conditions']) + """ + +def main(): + parser = argparse.ArgumentParser( + description='Demonstraton license compliance checker') + + parser.add_argument('--licenses_info', + help='path to JSON file containing all license data') + parser.add_argument('--out', default='sbom.out', help='SBOM output') + args = parser.parse_args() + + license_data = _load_package_data(args.licenses_info) + target = license_data[0] # we assume only one target for the demo + + top_level_target = target['top_level_target'] + dependencies = target['dependencies'] + # It's not really packages, but this is close proxy for now + licenses = target['licenses'] + packages = target['packages'] + + err = 0 + with codecs.open(args.out, mode='w', encoding='utf-8') as rpt: + _write_sbom(rpt, licenses + packages) + return err + + +if __name__ == '__main__': + main() From bbff1f8db6e940b5c31ced69c41de21b781bcc04 Mon Sep 17 00:00:00 2001 From: Tony Aiuto Date: Thu, 10 Nov 2022 16:54:51 -0500 Subject: [PATCH 2/3] more spdx-y --- rules/gather_metadata.bzl | 9 ++++++ tools/write_sbom.py | 64 ++++++++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/rules/gather_metadata.bzl b/rules/gather_metadata.bzl index 8a6a67e..ed302b6 100644 --- a/rules/gather_metadata.bzl +++ b/rules/gather_metadata.bzl @@ -42,6 +42,10 @@ def _strip_null_repo(label): return s[2:] return s +def _bazel_package(label): + l = _strip_null_repo(label) + return l[0:-(len(label.name) + 1)] + def _gather_metadata_info_impl(target, ctx): return gather_licenses_info_common(target, ctx, TransitiveMetadataInfo, NAMESPACES, [MetadataInfo, PackageInfo], should_traverse) @@ -183,6 +187,7 @@ def metadata_info_to_json(metadata_info): {{ "label": "{label}", "rule": "{label}", + "bazel_package": "{bazel_package}", "license_kinds": [{kinds} ], "copyright_notice": "{copyright_notice}", @@ -205,6 +210,7 @@ def metadata_info_to_json(metadata_info): package_info_template = """ {{ "target": "{label}", + "bazel_package": "{bazel_package}", "copyright_notice": "{copyright_notice}", "package_name": "{package_name}", "package_url": "{package_url}", @@ -242,6 +248,7 @@ def metadata_info_to_json(metadata_info): package_url = license.package_url, package_version = license.package_version, label = _strip_null_repo(license.label), + bazel_package = _bazel_package(license.label), used_by = ",\n ".join(sorted(['"%s"' % x for x in used_by[str(license.label)]])), )) @@ -275,6 +282,7 @@ def metadata_info_to_json(metadata_info): if mi.type == "package_info": all_packages.append(package_info_template.format( label = _strip_null_repo(mi.label), + bazel_package = _bazel_package(mi.label), copyright_notice = mi.copyright_notice, package_name = mi.package_name, package_url = mi.package_url, @@ -284,6 +292,7 @@ def metadata_info_to_json(metadata_info): if mi.type == "package_info2": all_packages.append(package_info_template.format( label = _strip_null_repo(mi.label), + bazel_package = _bazel_package(mi.label), copyright_notice = mi.data.get("copyright_notice") or "", package_name = mi.data.get("package_name") or "", package_url = mi.data.get("package_url") or "", diff --git a/tools/write_sbom.py b/tools/write_sbom.py index c7c56ba..18286ab 100644 --- a/tools/write_sbom.py +++ b/tools/write_sbom.py @@ -20,18 +20,34 @@ import argparse import codecs +import datetime import json +import os +TOOL = 'https//github.com/bazelbuild/rules_license/tools:write_sbom' + def _load_package_data(package_info): with codecs.open(package_info, encoding='utf-8') as inp: return json.loads(inp.read()) +def _write_sbom_header(out, package): + header = [ + 'SPDXVersion: SPDX-2.2', + 'DataLicense: CC0-1.0', + 'SPDXID: SPDXRef-DOCUMENT', + 'DocumentName: %s' % package, + # TBD + # 'DocumentNamespace: https://swinslow.net/spdx-examples/example1/hello-v3 + 'Creator: Person: %s' % os.getlogin(), + 'Creator: Tool: %s' % TOOL, + datetime.datetime.utcnow().strftime('Created: %Y-%m-%d-%H:%M:%SZ'), + '', + '##### Package: %s' % package, + ] + out.write('\n'.join(header)) + -def unique_licenses(licenses): - for target in licenses: - for lic in target.get('licenses') or []: - yield lic def _write_sbom(out, packages): """Produce a basic SBOM @@ -42,30 +58,23 @@ def _write_sbom(out, packages): """ for p in packages: name = p.get('package_name') or '' + out.write('\n') + out.write('SPDXID: "%s"\n' % name) + out.write(' name: "%s"\n' % name) if p.get('package_version'): - name = name + "/" + p['package_version'] - out.write('# %s\n' % name) + out.write(' versionInfo: "%s"\n' % p['package_version']) # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one. cn = p.get('copyright_notice') if cn: - out.write(' copyright: %s\n' % cn) + out.write(' copyrightText: "%s"\n' % cn) kinds = p.get('license_kinds') if kinds: - out.write(' license(s): "%s"\n' % + out.write(' licenseDeclared: "%s"\n' % ','.join([k['name'] for k in kinds])) url = p.get('package_url') if url: - out.write(' package URL: %s\n' % url) + out.write(' downloadLocation: %s\n' % url) - """ - for target in unique_licenses(licenses): - for lic in target.get('licenses') or []: - print("lic:", lic) - rule = lic['rule'] - for kind in lic['license_kinds']: - out.write('= %s\n kind: %s\n' % (rule, kind['target'])) - out.write(' conditions: %s\n' % kind['conditions']) - """ def main(): parser = argparse.ArgumentParser( @@ -83,11 +92,24 @@ def main(): dependencies = target['dependencies'] # It's not really packages, but this is close proxy for now licenses = target['licenses'] - packages = target['packages'] + package_infos = target['packages'] + + # These are similar dicts, so merge them by package. This is not + # strictly true, as different licenese can appear in the same + # package, but it is good enough for demonstrating the sbom. + + all = {x['bazel_package']: x for x in licenses} + for pi in package_infos: + p = all.get(pi['bazel_package']) + if p: + p.update(pi) + else: + all[pi['bazel_package']] = pi err = 0 - with codecs.open(args.out, mode='w', encoding='utf-8') as rpt: - _write_sbom(rpt, licenses + packages) + with codecs.open(args.out, mode='w', encoding='utf-8') as out: + _write_sbom_header(out, package=top_level_target) + _write_sbom(out, all.values()) return err From 4a60c99f5b4b2f72a01a24fe7f134e69f184208d Mon Sep 17 00:00:00 2001 From: Tony Aiuto Date: Thu, 15 Dec 2022 11:46:29 -0500 Subject: [PATCH 3/3] merte from main --- rules/licenses_core.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rules/licenses_core.bzl b/rules/licenses_core.bzl index 8aa77a1..9a5148b 100644 --- a/rules/licenses_core.bzl +++ b/rules/licenses_core.bzl @@ -128,7 +128,7 @@ def gather_metadata_info_common(target, ctx, provider_factory, namespaces, metad Args: target: The target of the aspect. ctx: The aspect evaluation context. - provider: abstracts the provider returned by this aspect + provider_factory: abstracts the provider returned by this aspect namespaces: a list of namespaces licenses must match to be included metadata_providers: a list of other providers of interest filter_func: a function that returns true iff the dep edge should be ignored @@ -178,7 +178,7 @@ def gather_metadata_info_common(target, ctx, provider_factory, namespaces, metad _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider_factory, filter_func) if not licenses and not trans_licenses: - return [provider(deps = depset(), licenses = depset(), traces = [])] + return [provider_factory(deps = depset(), licenses = depset(), traces = [])] # If this is the target, start the sequence of traces. if ctx.attr._trace[TraceInfo].trace and ctx.attr._trace[TraceInfo].trace in str(ctx.label):