incubator-nuttx/tools/licensing/check.py

325 lines
8.4 KiB
Python
Raw Normal View History

2020-09-18 02:55:15 +08:00
#!/usr/bin/env python3
############################################################################
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
############################################################################
2021-04-10 00:35:47 +08:00
import getopt
2020-09-18 02:55:15 +08:00
import json
2021-04-10 00:35:47 +08:00
import os
2020-09-18 02:55:15 +08:00
import re
import subprocess
2021-04-10 00:35:47 +08:00
import sys
2020-09-18 02:55:15 +08:00
import termcolor
committers_json = None
non_commiters_json = None
author_mappings_json = None
verbose_level = 0
color = True
2021-04-05 08:09:12 +08:00
def colored(s, c):
2020-09-18 02:55:15 +08:00
if color:
2021-04-05 08:09:12 +08:00
return termcolor.colored(s, c)
2020-09-18 02:55:15 +08:00
else:
return s
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def commit_attributions(c):
2021-04-05 08:09:12 +08:00
regex = re.compile("(?i)(?:by|from|author|Co-authored-by):? +(.+)")
return re.findall(regex, c["message"]) + re.findall(regex, c["body"])
2020-09-18 02:55:15 +08:00
def get_headers(s):
2021-04-05 08:09:12 +08:00
return re.findall("(?i)/\*\*\*.+?(?:Copyright).+?\*\*\*+/", s, re.DOTALL)
2020-09-18 02:55:15 +08:00
def get_file(blob):
try:
2021-04-05 08:09:12 +08:00
return subprocess.check_output(
["git", "cat-file", "-p", blob], stderr=subprocess.DEVNULL
).decode()
2021-04-10 00:35:47 +08:00
except subprocess.CalledProcessError:
2020-09-18 02:55:15 +08:00
return None
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def header_authors(header):
2021-04-05 08:09:12 +08:00
results = re.findall("[Aa]uthors?: +(.+?) *(?:Redistribution)", header, re.DOTALL)
results = [re.split("\n[ *]+", result) for result in results]
results = sum(results, []) # flatten
results = [
re.sub("[Cc]opyright:?( ?.[Cc].)? *([12][0-9]{3}[,-]? ?)", "", result)
for result in results
]
results = list(filter(lambda s: s != "", results)) # remove empty strings
2020-09-18 02:55:15 +08:00
return results
2021-04-05 08:09:12 +08:00
2021-02-25 20:48:46 +08:00
# Search for an author name in Apache's committers/non-committers
2020-09-18 02:55:15 +08:00
# database. It will return (apacheID,name) if there's a match or
# None if not. apacheID might be None if there's no Apache ID
# for author
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def search_for_cla(name):
2021-04-05 08:09:12 +08:00
for k, v in committers_json["committers"].items():
if v == name:
return (k, v)
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
if name in non_committers_json["non_committers"]:
return (None, name)
2020-09-18 02:55:15 +08:00
return None
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
# Returns the same as above, but this takes an author
# (which may include an email include an email used
# to look for alternative author names for this person)
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def author_has_cla(author):
2021-04-05 08:09:12 +08:00
if "@" in author:
matches = re.match("^(.+?)(?: +([^ ]+@[^ ]+ *))$", author)
if not matches:
return None # found an '@' but it wasn't an email, so this is most likely not really an author
2020-09-18 02:55:15 +08:00
name = matches.group(1)
2021-04-05 08:09:12 +08:00
email = matches.group(2).lstrip("<").rstrip(">")
2020-09-18 02:55:15 +08:00
else:
name = author.strip()
email = None
2021-04-05 08:09:12 +08:00
vvvprint("name: %s email: %s" % (name, email if email else "?"))
2020-09-18 02:55:15 +08:00
# first look for name directly
result = search_for_cla(name)
2021-04-05 08:09:12 +08:00
if result:
2020-09-18 02:55:15 +08:00
return result
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
# otherwise, get all available alternative names for author
# and look for each
2021-04-05 08:09:12 +08:00
if email and (email in author_mappings_json):
2020-09-18 02:55:15 +08:00
result = search_for_cla(author_mappings_json[email])
2021-04-05 08:09:12 +08:00
if result:
2020-09-18 02:55:15 +08:00
return result
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
# Nothing matched
return None
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def header_copyrights(header):
2021-04-05 08:09:12 +08:00
results = re.findall(
" \* *[Cc]opyright:?(?: ?.[Cc].)? *(?:[12][0-9]{3}[,-]? ?)* *(.+)", header
)
return [re.sub("(. )?[Aa]ll rights reserved.?", "", result) for result in results]
2020-09-18 02:55:15 +08:00
def report_cla(author):
cla = author_has_cla(author)
if cla:
2021-04-05 08:09:12 +08:00
(apacheid, name) = cla
print(colored("", "green"), end=" ")
2020-09-18 02:55:15 +08:00
else:
apacheid = None
2021-04-05 08:09:12 +08:00
print(colored("", "red"), end=" ")
2020-09-18 02:55:15 +08:00
if apacheid:
2021-04-05 08:09:12 +08:00
print("%s (ID: %s)" % (author, apacheid))
2020-09-18 02:55:15 +08:00
else:
print(author)
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def analyze(j):
complete_attributions = set()
complete_authors = set()
complete_copyrights = set()
2021-04-05 08:09:12 +08:00
vprint("file has %i commits" % len(j))
2020-09-18 02:55:15 +08:00
for commit in j:
authors = set()
2021-04-05 08:09:12 +08:00
vprint(colored("-", "yellow"))
vprint(colored("commit: ", "green") + commit["commit"])
vprint(colored("blob: ", "green") + commit["blob"])
vprint(colored("date: ", "green") + commit["date"])
vprint(
colored("author: ", "green")
+ ("%s <%s>" % (commit["author"], commit["author-email"]))
)
2020-09-18 02:55:15 +08:00
attributions = commit_attributions(commit)
2021-04-05 08:09:12 +08:00
if len(attributions) > 0:
vprint(colored("attributions:", "green"))
2020-09-18 02:55:15 +08:00
for attribution in attributions:
vprint(attribution)
complete_attributions |= set(attributions)
2021-04-05 08:09:12 +08:00
complete_authors |= set([commit["author"] + " " + commit["author-email"]])
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
# skip deletion commits
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
vprint(colored("blob:", "green"), end=" ")
if commit["blob"] == "0000000000000000000000000000000000000000":
vprint("zero (deletion)")
continue
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
file_contents = get_file(commit["blob"])
2020-09-18 02:55:15 +08:00
2021-02-25 20:48:46 +08:00
# skip inaccessible blobs (probably lived in a submodule)
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
if not file_contents:
vprint("inaccessible")
2020-09-18 02:55:15 +08:00
continue
else:
2021-04-05 08:09:12 +08:00
vprint("available")
2020-09-18 02:55:15 +08:00
headers = get_headers(file_contents)
2021-04-05 08:09:12 +08:00
vprint(colored("header authors:", "green"))
2020-09-18 02:55:15 +08:00
for header in headers:
ha = header_authors(header)
authors |= set(ha)
vprint(ha)
complete_authors |= set(authors)
2021-04-05 08:09:12 +08:00
vprint(colored("header copyrights:", "green"))
2020-09-18 02:55:15 +08:00
copyrights = set()
for header in headers:
hc = header_copyrights(header)
copyrights |= set(hc)
vprint(hc)
2021-04-05 08:09:12 +08:00
vprint(colored("commit description:", "green"))
vprint(commit["message"])
if commit["body"]:
vprint(colored("commit msg body:", "green"))
vprint(commit["body"])
2020-09-18 02:55:15 +08:00
2021-04-05 08:09:12 +08:00
vvprint(colored("headers:", "green"))
2020-09-18 02:55:15 +08:00
for header in headers:
vvprint(header)
complete_copyrights |= copyrights
2021-04-05 08:09:12 +08:00
vprint(colored("----\n", "yellow"))
print(colored("COMPLETE REPORT:", "blue"))
print(colored("attributions:", "green"))
if len(complete_attributions) == 0:
2020-09-18 02:55:15 +08:00
print("*none detected*")
else:
for attribution in complete_attributions:
report_cla(attribution)
2021-04-05 08:09:12 +08:00
print(colored("authors:", "green"))
2020-09-18 02:55:15 +08:00
for author in complete_authors:
report_cla(author)
2021-04-05 08:09:12 +08:00
print(colored("copyrights:", "green"))
print("\n".join(complete_copyrights))
2020-09-18 02:55:15 +08:00
def print_help():
print("Usage: check.py [-v] [-n] <JSON file>\n")
2021-04-05 08:09:12 +08:00
print(
" -v\tIncrease verbosity (add up to three times)\n"
" -n\tDo not use color for output"
)
2020-09-18 02:55:15 +08:00
def vprint(*args, **kwargs):
2021-04-05 08:09:12 +08:00
if verbose_level > 0:
2020-09-18 02:55:15 +08:00
print(*args, **kwargs)
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def vvprint(*args, **kwargs):
2021-04-05 08:09:12 +08:00
if verbose_level > 1:
2020-09-18 02:55:15 +08:00
print(*args, **kwargs)
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
def vvvprint(*args, **kwargs):
2021-04-05 08:09:12 +08:00
if verbose_level > 2:
2020-09-18 02:55:15 +08:00
print(*args, **kwargs)
2021-04-05 08:09:12 +08:00
2020-09-18 02:55:15 +08:00
#####
# First try to load the CLAs JSONs:
try:
2021-04-05 08:09:12 +08:00
with open(
os.path.dirname(os.path.abspath(__file__)) + "/icla-info.json", "r"
) as file:
2020-09-18 02:55:15 +08:00
committers_json = json.load(file)
2021-04-05 08:09:12 +08:00
with open(
os.path.dirname(os.path.abspath(__file__)) + "/icla-info_noid.json", "r"
) as file:
2020-09-18 02:55:15 +08:00
non_committers_json = json.load(file)
2021-04-10 00:35:47 +08:00
except Exception:
2021-04-05 08:09:12 +08:00
print(
"Could not open CLA JSON files, please read README.md for download instructions"
)
2020-09-18 02:55:15 +08:00
sys.exit(2)
# Open author mappings JSON
2021-04-05 08:09:12 +08:00
with open(
os.path.dirname(os.path.abspath(__file__)) + "/author_mappings.json", "r"
) as file:
2020-09-18 02:55:15 +08:00
author_mappings_json = json.load(file)
try:
opts, args = getopt.getopt(sys.argv[1:], "hnv")
except getopt.GetoptError:
print_help()
sys.exit(2)
for opt, arg in opts:
2021-04-05 08:09:12 +08:00
if opt == "-h":
2020-09-18 02:55:15 +08:00
print_help()
sys.exit()
elif opt == "-v":
verbose_level = verbose_level + 1
elif opt == "-n":
color = False
2021-04-05 08:09:12 +08:00
if len(args) != 1:
2020-09-18 02:55:15 +08:00
print_help()
sys.exit(2)
f = args[0]
if not f:
print_help()
sys.exit(2)
2021-04-05 08:09:12 +08:00
if f == "-":
2020-09-18 02:55:15 +08:00
j = json.load(sys.stdin)
else:
2021-04-05 08:09:12 +08:00
with open(f, "r") as file:
j = json.load(file)
2020-09-18 02:55:15 +08:00
analyze(j)