616 lines
21 KiB
Python
Executable File
616 lines
21 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) 2019 Nordic Semiconductor ASA
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""
|
|
Lists maintainers for files or commits. Similar in function to
|
|
scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
|
|
in MAINTAINERS.yml.
|
|
|
|
The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
|
|
|
|
See the help texts for the various subcommands for more information. They can
|
|
be viewed with e.g.
|
|
|
|
./get_maintainer.py path --help
|
|
|
|
This executable doubles as a Python library. Identifiers not prefixed with '_'
|
|
are part of the library API. The library documentation can be viewed with this
|
|
command:
|
|
|
|
$ pydoc get_maintainer
|
|
"""
|
|
|
|
import argparse
|
|
import operator
|
|
import os
|
|
import pathlib
|
|
import re
|
|
import shlex
|
|
import subprocess
|
|
import sys
|
|
|
|
from yaml import load, YAMLError
|
|
try:
|
|
# Use the speedier C LibYAML parser if available
|
|
from yaml import CSafeLoader as SafeLoader
|
|
except ImportError:
|
|
from yaml import SafeLoader
|
|
|
|
|
|
def _main():
|
|
# Entry point when run as an executable
|
|
|
|
args = _parse_args()
|
|
try:
|
|
args.cmd_fn(Maintainers(args.maintainers), args)
|
|
except (MaintainersError, GitError) as e:
|
|
_serr(e)
|
|
|
|
|
|
def _parse_args():
|
|
# Parses arguments when run as an executable
|
|
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
description=__doc__, allow_abbrev=False)
|
|
|
|
parser.add_argument(
|
|
"-m", "--maintainers",
|
|
metavar="MAINTAINERS_FILE",
|
|
help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
|
|
"the top-level repository directory is used, and must exist. "
|
|
"Paths in the maintainers file will always be taken as relative "
|
|
"to the top-level directory.")
|
|
|
|
subparsers = parser.add_subparsers(
|
|
help="Available commands (each has a separate --help text)")
|
|
|
|
id_parser = subparsers.add_parser(
|
|
"path",
|
|
help="List area(s) for paths")
|
|
id_parser.add_argument(
|
|
"paths",
|
|
metavar="PATH",
|
|
nargs="*",
|
|
help="Path to list areas for")
|
|
id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
|
|
|
|
commits_parser = subparsers.add_parser(
|
|
"commits",
|
|
help="List area(s) for commit range")
|
|
commits_parser.add_argument(
|
|
"commits",
|
|
metavar="COMMIT_RANGE",
|
|
nargs="*",
|
|
help="Commit range to list areas for (default: HEAD~..)")
|
|
commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
|
|
|
|
list_parser = subparsers.add_parser(
|
|
"list",
|
|
help="List files in areas")
|
|
list_parser.add_argument(
|
|
"area",
|
|
metavar="AREA",
|
|
nargs="?",
|
|
help="Name of area to list files in. If not specified, all "
|
|
"non-orphaned files are listed (all files that do not appear in "
|
|
"any area).")
|
|
list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
|
|
|
|
areas_parser = subparsers.add_parser(
|
|
"areas",
|
|
help="List areas and maintainers")
|
|
areas_parser.add_argument(
|
|
"maintainer",
|
|
metavar="MAINTAINER",
|
|
nargs="?",
|
|
help="List all areas maintained by maintainer.")
|
|
|
|
areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
|
|
|
|
orphaned_parser = subparsers.add_parser(
|
|
"orphaned",
|
|
help="List orphaned files (files that do not appear in any area)")
|
|
orphaned_parser.add_argument(
|
|
"path",
|
|
metavar="PATH",
|
|
nargs="?",
|
|
help="Limit to files under PATH")
|
|
orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
|
|
|
|
count_parser = subparsers.add_parser(
|
|
"count",
|
|
help="Count areas, unique maintainers, and / or unique collaborators")
|
|
count_parser.add_argument(
|
|
"-a",
|
|
"--count-areas",
|
|
action="store_true",
|
|
help="Count the number of areas")
|
|
count_parser.add_argument(
|
|
"-c",
|
|
"--count-collaborators",
|
|
action="store_true",
|
|
help="Count the number of unique collaborators")
|
|
count_parser.add_argument(
|
|
"-n",
|
|
"--count-maintainers",
|
|
action="store_true",
|
|
help="Count the number of unique maintainers")
|
|
count_parser.add_argument(
|
|
"-o",
|
|
"--count-unmaintained",
|
|
action="store_true",
|
|
help="Count the number of unmaintained areas")
|
|
count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
|
|
|
|
args = parser.parse_args()
|
|
if not hasattr(args, "cmd_fn"):
|
|
# Called without a subcommand
|
|
sys.exit(parser.format_usage().rstrip())
|
|
|
|
return args
|
|
|
|
|
|
class Maintainers:
|
|
"""
|
|
Represents the contents of a maintainers YAML file.
|
|
|
|
These attributes are available:
|
|
|
|
areas:
|
|
A dictionary that maps area names to Area instances, for all areas
|
|
defined in the maintainers file
|
|
|
|
filename:
|
|
The path to the maintainers file
|
|
"""
|
|
def __init__(self, filename=None):
|
|
"""
|
|
Creates a Maintainers instance.
|
|
|
|
filename (default: None):
|
|
Path to the maintainers file to parse. If None, MAINTAINERS.yml in
|
|
the top-level directory of the Git repository is used, and must
|
|
exist.
|
|
"""
|
|
if (filename is not None) and (pathlib.Path(filename).exists()):
|
|
self.filename = pathlib.Path(filename)
|
|
self._toplevel = self.filename.parent
|
|
else:
|
|
self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
|
|
self.filename = self._toplevel / "MAINTAINERS.yml"
|
|
|
|
self.areas = {}
|
|
for area_name, area_dict in _load_maintainers(self.filename).items():
|
|
area = Area()
|
|
area.name = area_name
|
|
area.status = area_dict.get("status")
|
|
area.maintainers = area_dict.get("maintainers", [])
|
|
area.collaborators = area_dict.get("collaborators", [])
|
|
area.inform = area_dict.get("inform", [])
|
|
area.labels = area_dict.get("labels", [])
|
|
area.tests = area_dict.get("tests", [])
|
|
area.tags = area_dict.get("tags", [])
|
|
area.description = area_dict.get("description")
|
|
|
|
# area._match_fn(path) tests if the path matches files and/or
|
|
# files-regex
|
|
area._match_fn = \
|
|
_get_match_fn(area_dict.get("files"),
|
|
area_dict.get("files-regex"))
|
|
|
|
# Like area._match_fn(path), but for files-exclude and
|
|
# files-regex-exclude
|
|
area._exclude_match_fn = \
|
|
_get_match_fn(area_dict.get("files-exclude"),
|
|
area_dict.get("files-regex-exclude"))
|
|
|
|
self.areas[area_name] = area
|
|
|
|
def path2areas(self, path):
|
|
"""
|
|
Returns a list of Area instances for the areas that contain 'path',
|
|
taken as relative to the current directory
|
|
"""
|
|
# Make directory paths end in '/' so that foo/bar matches foo/bar/.
|
|
# Skip this check in _contains() itself, because the isdir() makes it
|
|
# twice as slow in cases where it's not needed.
|
|
is_dir = os.path.isdir(path)
|
|
|
|
# Make 'path' relative to the repository root and normalize it.
|
|
# normpath() would remove a trailing '/', so we add it afterwards.
|
|
path = os.path.normpath(os.path.join(
|
|
os.path.relpath(os.getcwd(), self._toplevel),
|
|
path))
|
|
|
|
if is_dir:
|
|
path += "/"
|
|
|
|
return [area for area in self.areas.values()
|
|
if area._contains(path)]
|
|
|
|
def commits2areas(self, commits):
|
|
"""
|
|
Returns a set() of Area instances for the areas that contain files that
|
|
are modified by the commit range in 'commits'. 'commits' could be e.g.
|
|
"HEAD~..", to inspect the tip commit
|
|
"""
|
|
res = set()
|
|
# Final '--' is to make sure 'commits' is interpreted as a commit range
|
|
# rather than a path. That might give better error messages.
|
|
for path in _git("diff", "--name-only", commits, "--").splitlines():
|
|
res.update(self.path2areas(path))
|
|
return res
|
|
|
|
def __repr__(self):
|
|
return "<Maintainers for '{}'>".format(self.filename)
|
|
|
|
#
|
|
# Command-line subcommands
|
|
#
|
|
|
|
def _path_cmd(self, args):
|
|
# 'path' subcommand implementation
|
|
|
|
for path in args.paths:
|
|
if not os.path.exists(path):
|
|
_serr("'{}': no such file or directory".format(path))
|
|
|
|
res = set()
|
|
orphaned = []
|
|
for path in args.paths:
|
|
areas = self.path2areas(path)
|
|
res.update(areas)
|
|
if not areas:
|
|
orphaned.append(path)
|
|
|
|
_print_areas(res)
|
|
if orphaned:
|
|
if res:
|
|
print()
|
|
print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
|
|
|
|
def _commits_cmd(self, args):
|
|
# 'commits' subcommand implementation
|
|
|
|
commits = args.commits or ("HEAD~..",)
|
|
_print_areas({area for commit_range in commits
|
|
for area in self.commits2areas(commit_range)})
|
|
|
|
def _areas_cmd(self, args):
|
|
# 'areas' subcommand implementation
|
|
for area in self.areas.values():
|
|
if args.maintainer:
|
|
if args.maintainer in area.maintainers:
|
|
print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
|
|
else:
|
|
print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
|
|
|
|
def _count_cmd(self, args):
|
|
# 'count' subcommand implementation
|
|
|
|
if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
|
|
# if no specific count is provided, print them all
|
|
args.count_areas = True
|
|
args.count_collaborators = True
|
|
args.count_maintainers = True
|
|
args.count_unmaintained = True
|
|
|
|
unmaintained = 0
|
|
collaborators = set()
|
|
maintainers = set()
|
|
|
|
for area in self.areas.values():
|
|
if area.status == 'maintained':
|
|
maintainers = maintainers.union(set(area.maintainers))
|
|
elif area.status == 'odd fixes':
|
|
unmaintained += 1
|
|
collaborators = collaborators.union(set(area.collaborators))
|
|
|
|
if args.count_areas:
|
|
print('{:14}\t{}'.format('areas:', len(self.areas)))
|
|
if args.count_maintainers:
|
|
print('{:14}\t{}'.format('maintainers:', len(maintainers)))
|
|
if args.count_collaborators:
|
|
print('{:14}\t{}'.format('collaborators:', len(collaborators)))
|
|
if args.count_unmaintained:
|
|
print('{:14}\t{}'.format('unmaintained:', unmaintained))
|
|
|
|
def _list_cmd(self, args):
|
|
# 'list' subcommand implementation
|
|
|
|
if args.area is None:
|
|
# List all files that appear in some area
|
|
for path in _ls_files():
|
|
for area in self.areas.values():
|
|
if area._contains(path):
|
|
print(path)
|
|
break
|
|
else:
|
|
# List all files that appear in the given area
|
|
area = self.areas.get(args.area)
|
|
if area is None:
|
|
_serr("'{}': no such area defined in '{}'"
|
|
.format(args.area, self.filename))
|
|
|
|
for path in _ls_files():
|
|
if area._contains(path):
|
|
print(path)
|
|
|
|
def _orphaned_cmd(self, args):
|
|
# 'orphaned' subcommand implementation
|
|
|
|
if args.path is not None and not os.path.exists(args.path):
|
|
_serr("'{}': no such file or directory".format(args.path))
|
|
|
|
for path in _ls_files(args.path):
|
|
for area in self.areas.values():
|
|
if area._contains(path):
|
|
break
|
|
else:
|
|
print(path) # We get here if we never hit the 'break'
|
|
|
|
|
|
class Area:
|
|
"""
|
|
Represents an entry for an area in MAINTAINERS.yml.
|
|
|
|
These attributes are available:
|
|
|
|
status:
|
|
The status of the area, as a string. None if the area has no 'status'
|
|
key. See MAINTAINERS.yml.
|
|
|
|
maintainers:
|
|
List of maintainers. Empty if the area has no 'maintainers' key.
|
|
|
|
collaborators:
|
|
List of collaborators. Empty if the area has no 'collaborators' key.
|
|
|
|
inform:
|
|
List of people to inform on pull requests. Empty if the area has no
|
|
'inform' key.
|
|
|
|
labels:
|
|
List of GitHub labels for the area. Empty if the area has no 'labels'
|
|
key.
|
|
|
|
description:
|
|
Text from 'description' key, or None if the area has no 'description'
|
|
key
|
|
"""
|
|
def _contains(self, path):
|
|
# Returns True if the area contains 'path', and False otherwise
|
|
|
|
return self._match_fn and self._match_fn(path) and not \
|
|
(self._exclude_match_fn and self._exclude_match_fn(path))
|
|
|
|
def __repr__(self):
|
|
return "<Area {}>".format(self.name)
|
|
|
|
|
|
def _print_areas(areas):
|
|
first = True
|
|
for area in sorted(areas, key=operator.attrgetter("name")):
|
|
if not first:
|
|
print()
|
|
first = False
|
|
|
|
print("""\
|
|
{}
|
|
\tstatus: {}
|
|
\tmaintainers: {}
|
|
\tcollaborators: {}
|
|
\tinform: {}
|
|
\tlabels: {}
|
|
\ttests: {}
|
|
\ttags: {}
|
|
\tdescription: {}""".format(area.name,
|
|
area.status,
|
|
", ".join(area.maintainers),
|
|
", ".join(area.collaborators),
|
|
", ".join(area.inform),
|
|
", ".join(area.labels),
|
|
", ".join(area.tests),
|
|
", ".join(area.tags),
|
|
area.description or ""))
|
|
|
|
|
|
def _get_match_fn(globs, regexes):
|
|
# Constructs a single regex that tests for matches against the globs in
|
|
# 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
|
|
# Returns the search() method of the compiled regex.
|
|
#
|
|
# Returns None if there are neither globs nor regexes, which should be
|
|
# interpreted as no match.
|
|
|
|
if not (globs or regexes):
|
|
return None
|
|
|
|
regex = ""
|
|
|
|
if globs:
|
|
glob_regexes = []
|
|
for glob in globs:
|
|
# Construct a regex equivalent to the glob
|
|
glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
|
|
.replace("?", "[^/]")
|
|
|
|
if not glob.endswith("/"):
|
|
# Require a full match for globs that don't end in /
|
|
glob_regex += "$"
|
|
|
|
glob_regexes.append(glob_regex)
|
|
|
|
# The glob regexes must anchor to the beginning of the path, since we
|
|
# return search(). (?:) is a non-capturing group.
|
|
regex += "^(?:{})".format("|".join(glob_regexes))
|
|
|
|
if regexes:
|
|
if regex:
|
|
regex += "|"
|
|
regex += "|".join(regexes)
|
|
|
|
return re.compile(regex).search
|
|
|
|
|
|
def _load_maintainers(path):
|
|
# Returns the parsed contents of the maintainers file 'filename', also
|
|
# running checks on the contents. The returned format is plain Python
|
|
# dicts/lists/etc., mirroring the structure of the file.
|
|
|
|
with open(path, encoding="utf-8") as f:
|
|
try:
|
|
yaml = load(f, Loader=SafeLoader)
|
|
except YAMLError as e:
|
|
raise MaintainersError("{}: YAML error: {}".format(path, e))
|
|
|
|
_check_maintainers(path, yaml)
|
|
return yaml
|
|
|
|
|
|
def _check_maintainers(maints_path, yaml):
|
|
# Checks the maintainers data in 'yaml', which comes from the maintainers
|
|
# file at maints_path, which is a pathlib.Path instance
|
|
|
|
root = maints_path.parent
|
|
|
|
def ferr(msg):
|
|
_err("{}: {}".format(maints_path, msg)) # Prepend the filename
|
|
|
|
if not isinstance(yaml, dict):
|
|
ferr("empty or malformed YAML (not a dict)")
|
|
|
|
ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
|
|
"files-exclude", "files-regex", "files-regex-exclude",
|
|
"labels", "description", "tests", "tags"}
|
|
|
|
ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
|
|
ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages
|
|
|
|
for area_name, area_dict in yaml.items():
|
|
if not isinstance(area_dict, dict):
|
|
ferr("malformed entry for area '{}' (not a dict)"
|
|
.format(area_name))
|
|
|
|
for key in area_dict:
|
|
if key not in ok_keys:
|
|
ferr("unknown key '{}' in area '{}'"
|
|
.format(key, area_name))
|
|
|
|
if "status" in area_dict and \
|
|
area_dict["status"] not in ok_status:
|
|
ferr("bad 'status' key on area '{}', should be one of {}"
|
|
.format(area_name, ok_status_s))
|
|
|
|
if not area_dict.keys() & {"files", "files-regex"}:
|
|
ferr("either 'files' or 'files-regex' (or both) must be specified "
|
|
"for area '{}'".format(area_name))
|
|
|
|
if not area_dict.get("maintainers") and area_dict.get("status") == "maintained":
|
|
ferr("maintained area '{}' with no maintainers".format(area_name))
|
|
|
|
for list_name in "maintainers", "collaborators", "inform", "files", \
|
|
"files-regex", "labels", "tags", "tests":
|
|
if list_name in area_dict:
|
|
lst = area_dict[list_name]
|
|
if not (isinstance(lst, list) and
|
|
all(isinstance(elm, str) for elm in lst)):
|
|
ferr("malformed '{}' value for area '{}' -- should "
|
|
"be a list of strings".format(list_name, area_name))
|
|
|
|
for files_key in "files", "files-exclude":
|
|
if files_key in area_dict:
|
|
for glob_pattern in area_dict[files_key]:
|
|
# This could be changed if it turns out to be too slow,
|
|
# e.g. to only check non-globbing filenames. The tuple() is
|
|
# needed due to pathlib's glob() returning a generator.
|
|
paths = tuple(root.glob(glob_pattern))
|
|
if not paths:
|
|
ferr("glob pattern '{}' in '{}' in area '{}' does not "
|
|
"match any files".format(glob_pattern, files_key,
|
|
area_name))
|
|
if not glob_pattern.endswith("/"):
|
|
if all(path.is_dir() for path in paths):
|
|
ferr("glob pattern '{}' in '{}' in area '{}' "
|
|
"matches only directories, but has no "
|
|
"trailing '/'"
|
|
.format(glob_pattern, files_key,
|
|
area_name))
|
|
|
|
for files_regex_key in "files-regex", "files-regex-exclude":
|
|
if files_regex_key in area_dict:
|
|
for regex in area_dict[files_regex_key]:
|
|
try:
|
|
re.compile(regex)
|
|
except re.error as e:
|
|
ferr("bad regular expression '{}' in '{}' in "
|
|
"'{}': {}".format(regex, files_regex_key,
|
|
area_name, e.msg))
|
|
|
|
if "description" in area_dict and \
|
|
not isinstance(area_dict["description"], str):
|
|
ferr("malformed 'description' value for area '{}' -- should be a "
|
|
"string".format(area_name))
|
|
|
|
|
|
def _git(*args):
|
|
# Helper for running a Git command. Returns the rstrip()ed stdout output.
|
|
# Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
|
|
# errors.
|
|
|
|
git_cmd = ("git",) + args
|
|
git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors
|
|
|
|
try:
|
|
git_process = subprocess.Popen(
|
|
git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
except FileNotFoundError:
|
|
_giterr("git executable not found (when running '{}'). Check that "
|
|
"it's in listed in the PATH environment variable"
|
|
.format(git_cmd_s))
|
|
except OSError as e:
|
|
_giterr("error running '{}': {}".format(git_cmd_s, e))
|
|
|
|
stdout, stderr = git_process.communicate()
|
|
if git_process.returncode:
|
|
_giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
|
|
git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
|
|
|
|
return stdout.decode("utf-8").rstrip()
|
|
|
|
|
|
def _ls_files(path=None):
|
|
cmd = ["ls-files"]
|
|
if path is not None:
|
|
cmd.append(path)
|
|
return _git(*cmd).splitlines()
|
|
|
|
|
|
def _err(msg):
|
|
raise MaintainersError(msg)
|
|
|
|
|
|
def _giterr(msg):
|
|
raise GitError(msg)
|
|
|
|
|
|
def _serr(msg):
|
|
# For reporting errors when get_maintainer.py is run as a script.
|
|
# sys.exit() shouldn't be used otherwise.
|
|
sys.exit("{}: error: {}".format(sys.argv[0], msg))
|
|
|
|
|
|
class MaintainersError(Exception):
|
|
"Exception raised for MAINTAINERS.yml-related errors"
|
|
|
|
|
|
class GitError(Exception):
|
|
"Exception raised for Git-related errors"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
_main()
|