62 lines
1.9 KiB
Python
62 lines
1.9 KiB
Python
# Copyright (c) 2020, 2021 The Linux Foundation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import re
|
|
|
|
def getSPDXIDSafeCharacter(c):
|
|
"""
|
|
Converts a character to an SPDX-ID-safe character.
|
|
|
|
Arguments:
|
|
- c: character to test
|
|
Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
|
|
'-' otherwise
|
|
"""
|
|
if c.isalpha() or c.isdigit() or c == "-" or c == ".":
|
|
return c
|
|
return "-"
|
|
|
|
def convertToSPDXIDSafe(s):
|
|
"""
|
|
Converts a filename or other string to only SPDX-ID-safe characters.
|
|
Note that a separate check (such as in getUniqueID, below) will need
|
|
to be used to confirm that this is still a unique identifier, after
|
|
conversion.
|
|
|
|
Arguments:
|
|
- s: string to be converted.
|
|
Returns: string with all non-safe characters replaced with dashes.
|
|
"""
|
|
return "".join([getSPDXIDSafeCharacter(c) for c in s])
|
|
|
|
def getUniqueFileID(filenameOnly, timesSeen):
|
|
"""
|
|
Find an SPDX ID that is unique among others seen so far.
|
|
|
|
Arguments:
|
|
- filenameOnly: filename only (directories omitted) seeking ID.
|
|
- timesSeen: dict of all filename-only to number of times seen.
|
|
Returns: unique SPDX ID; updates timesSeen to include it.
|
|
"""
|
|
|
|
converted = convertToSPDXIDSafe(filenameOnly)
|
|
spdxID = f"SPDXRef-File-{converted}"
|
|
|
|
# determine whether spdxID is unique so far, or not
|
|
filenameTimesSeen = timesSeen.get(converted, 0) + 1
|
|
if filenameTimesSeen > 1:
|
|
# we'll append the # of times seen to the end
|
|
spdxID += f"-{filenameTimesSeen}"
|
|
else:
|
|
# first time seeing this filename
|
|
# edge case: if the filename itself ends in "-{number}", then we
|
|
# need to add a "-1" to it, so that we don't end up overlapping
|
|
# with an appended number from a similarly-named file.
|
|
p = re.compile(r"-\d+$")
|
|
if p.search(converted):
|
|
spdxID += "-1"
|
|
|
|
timesSeen[converted] = filenameTimesSeen
|
|
return spdxID
|