Lingao Meng
302422ad9d
everywhere: replace double words
...
import os
import re
common_words = set([
'about', 'after', 'all', 'also', 'an', 'and',
'any', 'are', 'as', 'at',
'be', 'because', 'but', 'by', 'can', 'come',
'could', 'day', 'do', 'even',
'first', 'for', 'get', 'give', 'go', 'has',
'have', 'he', 'her',
'him', 'his', 'how', 'I', 'in', 'into', 'it',
'its', 'just',
'know', 'like', 'look', 'make', 'man', 'many',
'me', 'more', 'my', 'new',
'no', 'not', 'now', 'of', 'one', 'only', 'or',
'other', 'our', 'out',
'over', 'people', 'say', 'see', 'she', 'so',
'some', 'take', 'tell', 'than',
'their', 'them', 'then', 'there', 'these',
'they', 'think',
'this', 'time', 'two', 'up', 'use', 'very',
'want', 'was', 'way',
'we', 'well', 'what', 'when', 'which', 'who',
'will', 'with', 'would',
'year', 'you', 'your'
])
valid_extensions = set([
'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay',
'rst', 'dtsi',
'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py',
'soc', 'cfg'
])
def filter_repeated_words(text):
# Split the text into lines
lines = text.split('\n')
# Combine lines into a single string with unique separator
combined_text = '/*sep*/'.join(lines)
# Replace repeated words within a line
def replace_within_line(match):
return match.group(1)
# Regex for matching repeated words within a line
within_line_pattern =
re.compile(r'\b(' +
'|'.join(map(re.escape, common_words)) +
r')\b\s+\b\1\b')
combined_text = within_line_pattern.
sub(replace_within_line, combined_text)
# Replace repeated words across line boundaries
def replace_across_lines(match):
return match.group(1) + match.group(2)
# Regex for matching repeated words across line boundaries
across_lines_pattern = re.
compile(r'\b(' + '|'.join(
map(re.escape, common_words)) +
r')\b(\s*[*\/\n\s]*)\b\1\b')
combined_text = across_lines_pattern.
sub(replace_across_lines, combined_text)
# Split the text back into lines
filtered_text = combined_text.split('/*sep*/')
return '\n'.join(filtered_text)
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
new_text = filter_repeated_words(text)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_text)
def process_directory(directory_path):
for root, dirs, files in os.walk(directory_path):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for file in files:
# Filter out hidden files
if file.startswith('.'):
continue
file_extension = file.split('.')[-1]
if
file_extension in valid_extensions: # 只处理指定后缀的文件
file_path = os.path.join(root, file)
print(f"Processed file: {file_path}")
process_file(file_path)
directory_to_process = "/home/mi/works/github/zephyrproject/zephyr"
process_directory(directory_to_process)
Signed-off-by: Lingao Meng <menglingao@xiaomi.com>
2024-06-25 06:05:35 -04:00