152 lines
5.5 KiB
Python
Executable File
152 lines
5.5 KiB
Python
Executable File
#! /usr/bin/env python2
|
|
import argparse
|
|
import logging
|
|
import mmap
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
exclude_regexs = []
|
|
|
|
noncomment_regex = re.compile(
|
|
"(^[ \t][^#]+.*\n)+"
|
|
, re.MULTILINE)
|
|
|
|
def config_import_file(filename):
|
|
"""
|
|
Imports regular expresions from any file *.conf in the given path
|
|
|
|
Each file follows the format::
|
|
|
|
#
|
|
# Comments for multiline regex 1...
|
|
#
|
|
multilineregex
|
|
multilineregex
|
|
multilineregex
|
|
#
|
|
# Comments for multiline regex 2...
|
|
#
|
|
multilineregex
|
|
multilineregex
|
|
multilineregex
|
|
|
|
etc.
|
|
"""
|
|
try:
|
|
with open(filename, "rb") as f:
|
|
mm = mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ)
|
|
# That regex basically selects any block of
|
|
# lines that is not a comment block. The
|
|
# finditer() finds all the blocks and selects
|
|
# the bits of mmapped-file that comprises
|
|
# each--we compile it into a regex and append.
|
|
for m in re.finditer("(^\s*[^#].*\n)+", mm, re.MULTILINE):
|
|
origin = "%s:%s-%s" % (filename, m.start(), m.end())
|
|
try:
|
|
r = re.compile(mm[m.start():m.end()], re.MULTILINE)
|
|
except Exception as e:
|
|
logging.error("%s: bytes %d-%d: bad regex: %s",
|
|
filename, m.start(), m.end(), e)
|
|
raise
|
|
logging.debug("%s: found regex at bytes %d-%d: %s",
|
|
filename, m.start(), m.end(),
|
|
mm[m.start():m.end()])
|
|
exclude_regexs.append((r, origin))
|
|
logging.debug("%s: loaded", filename)
|
|
except Exception as e:
|
|
raise Exception("E: %s: can't load config file: %s" % (filename, e))
|
|
|
|
def config_import_path(path):
|
|
"""
|
|
Imports regular expresions from any file *.conf in the given path
|
|
"""
|
|
file_regex = re.compile(".*\.conf$")
|
|
try:
|
|
for dirpath, dirnames, filenames in os.walk(path):
|
|
for _filename in sorted(filenames):
|
|
filename = os.path.join(dirpath, _filename)
|
|
if not file_regex.search(_filename):
|
|
logging.debug("%s: ignored", filename)
|
|
continue
|
|
config_import_file(filename)
|
|
except Exception as e:
|
|
raise Exception("E: %s: can't load config files: %s" % (path, e))
|
|
|
|
def config_import(paths):
|
|
"""
|
|
Imports regular expresions from any file *.conf in the list of paths.
|
|
|
|
If a path is "" or None, the list of paths until then is flushed
|
|
and only the new ones are considered.
|
|
"""
|
|
_paths = []
|
|
# Go over the list, flush it if the user gave an empty path ("")
|
|
for path in paths:
|
|
if path == "" or path == None:
|
|
logging.debug("flushing current config list: %s", _paths)
|
|
_paths = []
|
|
else:
|
|
_paths.append(path)
|
|
logging.debug("config list: %s", _paths)
|
|
for path in _paths:
|
|
config_import_path(path)
|
|
|
|
arg_parser = argparse.ArgumentParser()
|
|
arg_parser.add_argument("-v", "--verbosity", action = "count", default = 0,
|
|
help = "increase verbosity")
|
|
arg_parser.add_argument("-q", "--quiet", action = "count", default = 0,
|
|
help = "decrease verbosity")
|
|
arg_parser.add_argument("-c", "--config-dir", action = "append", nargs = "?",
|
|
default = [ ".known-issues/" ],
|
|
help = "configuration directory (multiple can be "
|
|
"given; if none given, clears the current list) "
|
|
"%(default)s")
|
|
arg_parser.add_argument("FILENAMEs", nargs = "+",
|
|
help = "files to filter")
|
|
args = arg_parser.parse_args()
|
|
|
|
logging.basicConfig(level = 40 - 10 * (args.verbosity - args.quiet),
|
|
format = "%(levelname)s: %(message)s")
|
|
|
|
path = ".known-issues/"
|
|
logging.debug("Reading configuration from directory `%s`", path)
|
|
config_import(args.config_dir)
|
|
|
|
exclude_ranges = []
|
|
|
|
for filename in args.FILENAMEs:
|
|
try:
|
|
with open(filename, "r+b") as f:
|
|
logging.info("%s: filtering", filename)
|
|
# Yeah, this should be more protected in case of exception
|
|
# and such, but this is a short running program...
|
|
mm = mmap.mmap(f.fileno(), 0)
|
|
for ex, origin in exclude_regexs:
|
|
logging.info("%s: searching from %s: %s",
|
|
filename, origin, ex.pattern)
|
|
for m in re.finditer(ex.pattern, mm, re.MULTILINE):
|
|
logging.debug("%s: %s-%s: match from from %s",
|
|
filename, m.start(), m.end(), origin)
|
|
exclude_ranges.append((m.start(), m.end()))
|
|
|
|
exclude_ranges = sorted(exclude_ranges, key=lambda r: r[0])
|
|
logging.warning("%s: ranges excluded: %s", filename, exclude_ranges)
|
|
|
|
# Printd what has not been filtered
|
|
offset = 0
|
|
for b, e in exclude_ranges:
|
|
mm.seek(offset)
|
|
d = b - offset
|
|
logging.debug("%s: exclude range (%d, %d), from %d %dB",
|
|
filename, b, e, offset, d)
|
|
if b > offset:
|
|
print(mm.read(d - 1))
|
|
offset = e
|
|
mm.seek(offset)
|
|
if len(mm) != offset:
|
|
print mm.read(len(mm) - offset - 1)
|
|
del mm
|
|
except Exception as e:
|
|
logging.error("%s: cannot load: %s", filename, e)
|