[OE-core] [PATCH v2 1/2] spdx.bbclass: Create the spdx file which is compliant with SPDX 1.2 Specification
Lei Maohui
leimaohui at cn.fujitsu.com
Fri May 29 09:45:11 UTC 2015
The main changes are:
1. use "curl" command instead of "wget" when get spdx file from FOSSologySPDX instance server.
Before apply these patches, the command is :
wget -qO - --no-check-certificate --timeout=0 --post-file=xxx/yyy/zzz.tar.gz http://localhost//?mod=spdx_license_once&noCopyright=${FOSS_COPYRIGHT}&recursiveUnpack=${FOSS_RECURSIVE_UNPACK}
After apply these patches, the command is :
curl http://127.0.0.1/repo/ --noproxy 127.0.0.1 -k -F "mod=spdx_license_once" -F "noCopyright=false" -F "jsonOutput=false" -F "fullSPDXFlag=true" -F "file=@ xxx/yyy/zzz.tar.gz" -o xxx/yyy/zzz.spdx
Because if use "wget" command,the Mandatory fields of the SPDX Specification such as the following can't be obtained.
1) PackageLicenseInfoFromFiles(Package Information)
2) PackageLicenseDeclared(Package Information)
3) LicenseID(License Information)
4) ExtractedText(License Information)
5) LicenseName(License Information)
2. In order to avoid the SPDX_S be polluted in the rebuild, create ${WORKDIR}/${SPDX_TEMP_DIR} to save the source.
3. Add mandatory field to be compliant with the SPDX 1.2 Specification.
Signed-off-by: Lei Maohui <leimaohui at cn.fujitsu.com>
---
meta/classes/spdx.bbclass | 425 +++++++++++++++++-----------------------------
1 file changed, 155 insertions(+), 270 deletions(-)
diff --git a/meta/classes/spdx.bbclass b/meta/classes/spdx.bbclass
index 454c53e..09584af 100644
--- a/meta/classes/spdx.bbclass
+++ b/meta/classes/spdx.bbclass
@@ -15,178 +15,191 @@
# SPDX file will be output to the path which is defined as[SPDX_MANIFEST_DIR]
# in ./meta/conf/licenses.conf.
+SPDXOUTPUTDIR = "${WORKDIR}/spdx_output_dir"
SPDXSSTATEDIR = "${WORKDIR}/spdx_sstate_dir"
# If ${S} isn't actually the top-level source directory, set SPDX_S to point at
# the real top-level directory.
+
SPDX_S ?= "${S}"
python do_spdx () {
import os, sys
- import json, shutil
+ import json
+
+ #The source of gcc is too large to get it's spdx.So,give up.
+ bpn = d.getVar('BPN', True)
+ if ((bpn == "gcc") or (bpn == "libgcc")):
+ return None
info = {}
info['workdir'] = d.getVar('WORKDIR', True)
- info['sourcedir'] = d.getVar('SPDX_S', True)
info['pn'] = d.getVar('PN', True)
info['pv'] = d.getVar('PV', True)
+ info['package_download_location'] = d.getVar('SRC_URI', True)
+ if info['package_download_location'] != "":
+ info['package_download_location'] = info['package_download_location'].split()[0]
info['spdx_version'] = d.getVar('SPDX_VERSION', True)
info['data_license'] = d.getVar('DATA_LICENSE', True)
+ info['creator'] = {}
+ info['creator']['Tool'] = d.getVar('CREATOR_TOOL', True)
+ info['license_list_version'] = d.getVar('LICENSELISTVERSION', True)
+ info['package_homepage'] = d.getVar('HOMEPAGE', True)
+ info['package_summary'] = d.getVar('SUMMARY', True)
- sstatedir = d.getVar('SPDXSSTATEDIR', True)
- sstatefile = os.path.join(sstatedir, info['pn'] + info['pv'] + ".spdx")
-
+ spdx_sstate_dir = d.getVar('SPDXSSTATEDIR', True)
manifest_dir = d.getVar('SPDX_MANIFEST_DIR', True)
- info['outfile'] = os.path.join(manifest_dir, info['pn'] + ".spdx" )
-
- info['spdx_temp_dir'] = d.getVar('SPDX_TEMP_DIR', True)
- info['tar_file'] = os.path.join(info['workdir'], info['pn'] + ".tar.gz" )
-
+ info['outfile'] = os.path.join(manifest_dir, info['pn'] + "-" + info['pv'] + ".spdx")
+ sstatefile = os.path.join(spdx_sstate_dir,
+ info['pn'] + "-" + info['pv'] + ".spdx" )
+ info['tar_file'] = os.path.join(info['workdir'], info['pn'] + ".tar.gz")
+
# Make sure important dirs exist
try:
bb.utils.mkdirhier(manifest_dir)
- bb.utils.mkdirhier(sstatedir)
- bb.utils.mkdirhier(info['spdx_temp_dir'])
+ bb.utils.mkdirhier(spdx_sstate_dir)
except OSError as e:
bb.error("SPDX: Could not set up required directories: " + str(e))
return
## get everything from cache. use it to decide if
- ## something needs to be rerun
- cur_ver_code = get_ver_code(info['sourcedir'])
+ ## something needs to be rerun
+ d.setVar('WORKDIR', d.getVar('SPDX_TEMP_DIR', True))
+ info['sourcedir'] = d.getVar('SPDX_S', True)
+ cur_ver_code = get_ver_code(info['sourcedir']).split()[0]
cache_cur = False
if os.path.exists(sstatefile):
## cache for this package exists. read it in
cached_spdx = get_cached_spdx(sstatefile)
-
- if cached_spdx['PackageVerificationCode'] == cur_ver_code:
- bb.warn("SPDX: Verification code for " + info['pn']
- + "is same as cache's. do nothing")
+ if cached_spdx:
+ cached_spdx = cached_spdx.split()[0]
+ if (cached_spdx == cur_ver_code):
+ bb.warn(info['pn'] + "'s ver code same as cache's. do nothing")
cache_cur = True
- else:
- local_file_info = setup_foss_scan(info, True, cached_spdx['Files'])
- else:
- local_file_info = setup_foss_scan(info, False, None)
-
- if cache_cur:
- spdx_file_info = cached_spdx['Files']
- foss_package_info = cached_spdx['Package']
- foss_license_info = cached_spdx['Licenses']
- else:
+ create_manifest(info,sstatefile)
+ if not cache_cur:
## setup fossology command
foss_server = d.getVar('FOSS_SERVER', True)
- foss_flags = d.getVar('FOSS_WGET_FLAGS', True)
- foss_full_spdx = d.getVar('FOSS_FULL_SPDX', True) == "true" or False
- foss_command = "wget %s --post-file=%s %s"\
- % (foss_flags, info['tar_file'], foss_server)
-
- foss_result = run_fossology(foss_command, foss_full_spdx)
- if foss_result is not None:
- (foss_package_info, foss_file_info, foss_license_info) = foss_result
- spdx_file_info = create_spdx_doc(local_file_info, foss_file_info)
- ## write to cache
- write_cached_spdx(sstatefile, cur_ver_code, foss_package_info,
- spdx_file_info, foss_license_info)
+ foss_flags = d.getVar('FOSS_CURL_FLAGS', True)
+ foss_command = "curl %s -k %s -F \"file=@%s\" -o %s"\
+ % (foss_server,foss_flags,info['tar_file'],sstatefile)
+
+ #get the source tarball for fossy_scan
+ setup_foss_scan(info)
+ #get spdx file from fossylogy server
+ run_fossology(foss_command)
+ if get_cached_spdx(sstatefile) != None:
+ write_cached_spdx(info,sstatefile,cur_ver_code)
+ ## CREATE MANIFEST(write to outfile )
+ create_manifest(info,sstatefile)
else:
- bb.error("SPDX: Could not communicate with FOSSology server. Command was: " + foss_command)
- return
-
- ## Get document and package level information
- spdx_header_info = get_header_info(info, cur_ver_code, foss_package_info)
-
- ## CREATE MANIFEST
- create_manifest(info, spdx_header_info, spdx_file_info, foss_license_info)
-
- ## clean up the temp stuff
- shutil.rmtree(info['spdx_temp_dir'], ignore_errors=True)
+ bb.warn('Can\'t get the spdx file' + info['pn'] + '. Please check your fossylogy server.')
if os.path.exists(info['tar_file']):
remove_file(info['tar_file'])
+ d.setVar('WORKDIR', info['workdir'])
+}
+#Get the src after do_patch.
+python do_get_spdx_s() {
+ import shutil
+ #The source of gcc is too large to get it's spdx.So,give up.
+ bpn = d.getVar('BPN', True)
+ if ((bpn == "gcc") or (bpn == "libgcc")):
+ return None
+ # Change the WORKDIR to make do_unpack do_patch run in another dir.
+ d.setVar('WORKDIR', d.getVar('SPDX_TEMP_DIR', True))
+ # The changed 'WORKDIR' also casued 'B' changed, create dir 'B' for the
+ # possibly requiring of the following tasks (such as some recipes's
+ # do_patch required 'B' existed).
+ bb.utils.mkdirhier(d.getVar('B', True))
+
+ # The kernel source is ready after do_validate_branches
+ if bb.data.inherits_class('kernel-yocto', d):
+ shutil.copytree(d.getVar('S', True), d.getVar('WORKDIR', True) + "/kernel-source")
+ return None
+ else:
+ bb.build.exec_func('do_unpack', d)
+ # The S of the gcc source is work-share
+ if ((bpn == "gcc") or (bpn == "libgcc")):
+ d.setVar('S', d.getVar('WORKDIR', True) + "/gcc-" + d.getVar('PV', True))
+ bb.build.exec_func('do_patch', d)
}
-addtask spdx after do_patch before do_configure
-
-def create_manifest(info, header, files, licenses):
- import codecs
- with codecs.open(info['outfile'], mode='w', encoding='utf-8') as f:
- # Write header
- f.write(header + '\n')
- # Write file data
- for chksum, block in files.iteritems():
- f.write("FileName: " + block['FileName'] + '\n')
- for key, value in block.iteritems():
- if not key == 'FileName':
- f.write(key + ": " + value + '\n')
- f.write('\n')
+addtask get_spdx_s after do_patch before do_configure
+addtask spdx after do_get_spdx_s before do_configure
- # Write license data
- for id, block in licenses.iteritems():
- f.write("LicenseID: " + id + '\n')
- for key, value in block.iteritems():
- f.write(key + ": " + value + '\n')
- f.write('\n')
+def create_manifest(info,sstatefile):
+ import shutil
+ shutil.copyfile(sstatefile,info['outfile'])
def get_cached_spdx(sstatefile):
- import json
- import codecs
- cached_spdx_info = {}
- with codecs.open(sstatefile, mode='r', encoding='utf-8') as f:
- try:
- cached_spdx_info = json.load(f)
- except ValueError as e:
- cached_spdx_info = None
- return cached_spdx_info
+ import subprocess
+ if not os.path.exists(sstatefile):
+ return None
+
+ try:
+ output = subprocess.check_output(['grep', "PackageVerificationCode", sstatefile])
+ except subprocess.CalledProcessError as e:
+ return None
+ cached_spdx_info=output.split(': ')
+ return cached_spdx_info[1]
-def write_cached_spdx(sstatefile, ver_code, package_info, files, license_info):
- import json
- import codecs
- spdx_doc = {}
- spdx_doc['PackageVerificationCode'] = ver_code
- spdx_doc['Files'] = {}
- spdx_doc['Files'] = files
- spdx_doc['Package'] = {}
- spdx_doc['Package'] = package_info
- spdx_doc['Licenses'] = {}
- spdx_doc['Licenses'] = license_info
- with codecs.open(sstatefile, mode='w', encoding='utf-8') as f:
- f.write(json.dumps(spdx_doc))
+#add necessary information into spdx file
+def write_cached_spdx(info,sstatefile, ver_code):
+ import subprocess
-def setup_foss_scan(info, cache, cached_files):
- import errno, shutil
- import tarfile
- file_info = {}
- cache_dict = {}
+ def sed_replace(dest_sed_cmd,key_word,replace_info):
+ dest_sed_cmd = dest_sed_cmd + "-e 's#^" + key_word + ".*#" + \
+ key_word + replace_info + "#' "
+ return dest_sed_cmd
- for f_dir, f in list_files(info['sourcedir']):
- full_path = os.path.join(f_dir, f)
- abs_path = os.path.join(info['sourcedir'], full_path)
- dest_dir = os.path.join(info['spdx_temp_dir'], f_dir)
- dest_path = os.path.join(info['spdx_temp_dir'], full_path)
+ def sed_insert(dest_sed_cmd,key_word,new_line):
+ dest_sed_cmd = dest_sed_cmd + "-e '/^" + key_word \
+ + r"/a\\" + new_line + "' "
+ return dest_sed_cmd
- checksum = hash_file(abs_path)
- if not checksum is None:
- file_info[checksum] = {}
- ## retain cache information if it exists
- if cache and checksum in cached_files:
- file_info[checksum] = cached_files[checksum]
- ## have the file included in what's sent to the FOSSology server
- else:
- file_info[checksum]['FileName'] = full_path
- try:
- bb.utils.mkdirhier(dest_dir)
- shutil.copyfile(abs_path, dest_path)
- except OSError as e:
- bb.warn("SPDX: mkdirhier failed: " + str(e))
- except shutil.Error as e:
- bb.warn("SPDX: copyfile failed: " + str(e))
- except IOError as e:
- bb.warn("SPDX: copyfile failed: " + str(e))
- else:
- bb.warn("SPDX: Could not get checksum for file: " + f)
+ ## document level information
+ sed_cmd = r"sed -i -e 's#\r$##g' "
+ sed_cmd = sed_replace(sed_cmd,"SPDXVersion: ",info['spdx_version'])
+ spdx_DocumentComment = "<text>SPDX for " + info['pn'] + " version " \
+ + info['pv'] + "</text>"
+ sed_cmd = sed_replace(sed_cmd,"DocumentComment",spdx_DocumentComment)
- with tarfile.open(info['tar_file'], "w:gz") as tar:
- tar.add(info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']))
+ ## Creator information
+ sed_cmd = sed_replace(sed_cmd,"Creator: Tool: ",info['creator']['Tool'])
+ sed_cmd = sed_insert(sed_cmd,"CreatorComment: ","LicenseListVersion: " + info['license_list_version'])
+
+ ## package level information
+ sed_cmd = sed_replace(sed_cmd,"PackageName: ",info['pn'])
+ sed_cmd = sed_replace(sed_cmd,"PackageVersion: ",info['pv'])
+ sed_cmd = sed_replace(sed_cmd,"PackageDownloadLocation: ",info['package_download_location'])
+ sed_cmd = sed_insert(sed_cmd,"PackageChecksum: ","PackageHomePage: " + info['package_homepage'])
+ sed_cmd = sed_replace(sed_cmd,"PackageSummary: ","<text>" + info['package_summary'] + "</text>")
+ sed_cmd = sed_replace(sed_cmd,"PackageFileName: ",os.path.basename(info['tar_file']))
+ sed_cmd = sed_replace(sed_cmd,"PackageVerificationCode: ",ver_code)
+ sed_cmd = sed_replace(sed_cmd,"PackageDescription: ",
+ "<text>" + info['pn'] + " version " + info['pv'] + "</text>")
+ sed_cmd = sed_cmd + sstatefile
+
+ subprocess.call("%s" % sed_cmd, shell=True)
+
+#archive the SPDX_S for get spdx file from fossylogy server
+def setup_foss_scan(info):
+ import tarfile,os
+ srcdir = info['sourcedir'].rstrip('/')
+ dirname = os.path.dirname(srcdir)
+ basename = os.path.basename(srcdir)
+ os.chdir(dirname)
+ tar = tarfile.open(info['tar_file'], 'w:gz')
+ tar.add(basename)
+ tar.close()
+
- return file_info
+def remove_dir_tree(dir_name):
+ import shutil
+ try:
+ shutil.rmtree(dir_name)
+ except:
+ pass
def remove_file(file_name):
try:
@@ -203,12 +216,14 @@ def list_files(dir):
def hash_file(file_name):
try:
- with open(file_name, 'rb') as f:
- data_string = f.read()
- sha1 = hash_string(data_string)
- return sha1
+ f = open(file_name, 'rb')
+ data_string = f.read()
except:
- return None
+ return None
+ finally:
+ f.close()
+ sha1 = hash_string(data_string)
+ return sha1
def hash_string(data):
import hashlib
@@ -216,150 +231,20 @@ def hash_string(data):
sha1.update(data)
return sha1.hexdigest()
-def run_fossology(foss_command, full_spdx):
- import string, re
- import subprocess
-
- p = subprocess.Popen(foss_command.split(),
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- foss_output, foss_error = p.communicate()
- if p.returncode != 0:
- return None
-
- foss_output = unicode(foss_output, "utf-8")
- foss_output = string.replace(foss_output, '\r', '')
-
- # Package info
- package_info = {}
- if full_spdx:
- # All mandatory, only one occurance
- package_info['PackageCopyrightText'] = re.findall('PackageCopyrightText: (.*?</text>)', foss_output, re.S)[0]
- package_info['PackageLicenseDeclared'] = re.findall('PackageLicenseDeclared: (.*)', foss_output)[0]
- package_info['PackageLicenseConcluded'] = re.findall('PackageLicenseConcluded: (.*)', foss_output)[0]
- # These may be more than one
- package_info['PackageLicenseInfoFromFiles'] = re.findall('PackageLicenseInfoFromFiles: (.*)', foss_output)
- else:
- DEFAULT = "NOASSERTION"
- package_info['PackageCopyrightText'] = "<text>" + DEFAULT + "</text>"
- package_info['PackageLicenseDeclared'] = DEFAULT
- package_info['PackageLicenseConcluded'] = DEFAULT
- package_info['PackageLicenseInfoFromFiles'] = []
-
- # File info
- file_info = {}
- records = []
- # FileName is also in PackageFileName, so we match on FileType as well.
- records = re.findall('FileName:.*?FileType:.*?</text>', foss_output, re.S)
- for rec in records:
- chksum = re.findall('FileChecksum: SHA1: (.*)\n', rec)[0]
- file_info[chksum] = {}
- file_info[chksum]['FileCopyrightText'] = re.findall('FileCopyrightText: '
- + '(.*?</text>)', rec, re.S )[0]
- fields = ['FileName', 'FileType', 'LicenseConcluded', 'LicenseInfoInFile']
- for field in fields:
- file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0]
-
- # Licenses
- license_info = {}
- licenses = []
- licenses = re.findall('LicenseID:.*?LicenseName:.*?\n', foss_output, re.S)
- for lic in licenses:
- license_id = re.findall('LicenseID: (.*)\n', lic)[0]
- license_info[license_id] = {}
- license_info[license_id]['ExtractedText'] = re.findall('ExtractedText: (.*?</text>)', lic, re.S)[0]
- license_info[license_id]['LicenseName'] = re.findall('LicenseName: (.*)', lic)[0]
-
- return (package_info, file_info, license_info)
-
-def create_spdx_doc(file_info, scanned_files):
- import json
- ## push foss changes back into cache
- for chksum, lic_info in scanned_files.iteritems():
- if chksum in file_info:
- file_info[chksum]['FileType'] = lic_info['FileType']
- file_info[chksum]['FileChecksum: SHA1'] = chksum
- file_info[chksum]['LicenseInfoInFile'] = lic_info['LicenseInfoInFile']
- file_info[chksum]['LicenseConcluded'] = lic_info['LicenseConcluded']
- file_info[chksum]['FileCopyrightText'] = lic_info['FileCopyrightText']
- else:
- bb.warn("SPDX: " + lic_info['FileName'] + " : " + chksum
- + " : is not in the local file info: "
- + json.dumps(lic_info, indent=1))
- return file_info
+def run_fossology(foss_command):
+ import subprocess
+ subprocess.call(foss_command, shell=True)
def get_ver_code(dirname):
chksums = []
for f_dir, f in list_files(dirname):
- hash = hash_file(os.path.join(dirname, f_dir, f))
- if not hash is None:
- chksums.append(hash)
- else:
- bb.warn("SPDX: Could not hash file: " + path)
+ try:
+ stats = os.stat(os.path.join(dirname,f_dir,f))
+ except OSError as e:
+ bb.warn("Stat failed" + str(e) + "\n")
+ continue
+ chksums.append(hash_file(os.path.join(dirname,f_dir,f)))
ver_code_string = ''.join(chksums).lower()
ver_code = hash_string(ver_code_string)
return ver_code
-def get_header_info(info, spdx_verification_code, package_info):
- """
- Put together the header SPDX information.
- Eventually this needs to become a lot less
- of a hardcoded thing.
- """
- from datetime import datetime
- import os
- head = []
- DEFAULT = "NOASSERTION"
-
- package_checksum = hash_file(info['tar_file'])
- if package_checksum is None:
- package_checksum = DEFAULT
-
- ## document level information
- head.append("## SPDX Document Information")
- head.append("SPDXVersion: " + info['spdx_version'])
- head.append("DataLicense: " + info['data_license'])
- head.append("DocumentComment: <text>SPDX for "
- + info['pn'] + " version " + info['pv'] + "</text>")
- head.append("")
-
- ## Creator information
- ## Note that this does not give time in UTC.
- now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
- head.append("## Creation Information")
- ## Tools are supposed to have a version, but FOSSology+SPDX provides none.
- head.append("Creator: Tool: FOSSology+SPDX")
- head.append("Created: " + now)
- head.append("CreatorComment: <text>UNO</text>")
- head.append("")
-
- ## package level information
- head.append("## Package Information")
- head.append("PackageName: " + info['pn'])
- head.append("PackageVersion: " + info['pv'])
- head.append("PackageFileName: " + os.path.basename(info['tar_file']))
- head.append("PackageSupplier: Person:" + DEFAULT)
- head.append("PackageDownloadLocation: " + DEFAULT)
- head.append("PackageSummary: <text></text>")
- head.append("PackageOriginator: Person:" + DEFAULT)
- head.append("PackageChecksum: SHA1: " + package_checksum)
- head.append("PackageVerificationCode: " + spdx_verification_code)
- head.append("PackageDescription: <text>" + info['pn']
- + " version " + info['pv'] + "</text>")
- head.append("")
- head.append("PackageCopyrightText: "
- + package_info['PackageCopyrightText'])
- head.append("")
- head.append("PackageLicenseDeclared: "
- + package_info['PackageLicenseDeclared'])
- head.append("PackageLicenseConcluded: "
- + package_info['PackageLicenseConcluded'])
-
- for licref in package_info['PackageLicenseInfoFromFiles']:
- head.append("PackageLicenseInfoFromFiles: " + licref)
- head.append("")
-
- ## header for file level
- head.append("## File Information")
- head.append("")
-
- return '\n'.join(head)
--
1.8.4.2
More information about the Openembedded-core
mailing list