利用AI生成批量導出內核源碼的腳本
最近用AI實現了一個腳本,用于將內核源碼全部導出為txt文件,路徑全部平坦化,而且支持過濾,下面是跟AI的對話:
https://yuanbao.tencent.com/bot/app/share/chat/GjdXiqdQaoVn
下面是最終生成的腳本,非常不錯:
kernel_flatten.py
#!/usr/bin/env python3
"""
Linux內核源碼導出工具(帶分組功能)
支持按編號分組存儲文件,解決大數量文件管理問題
"""
import os
import shutil
import argparse
import re
import sys
import math
from collections import OrderedDict
def sanitize_filename(path):
"""路徑安全化:替換特殊字符為下劃線"""
return re.sub(r'[^\w\-_.]', '_', path)
class FilterRule:
"""過濾規則處理器"""
def __init__(self, pattern, is_include, is_suffix=False):
self.pattern = pattern
self.is_include = is_include
self.is_suffix = is_suffix
def match(self, rel_path, is_dir=False):
"""匹配目錄或后綴規則"""
if self.is_suffix:
# 后綴匹配(大小寫不敏感)
return rel_path.lower().endswith(self.pattern.lower())
else:
# 目錄匹配(支持子目錄遞歸)
pattern = self.pattern.replace('\\', '/')
target_path = rel_path.replace('\\', '/')
return (target_path == pattern or
target_path.startswith(pattern + '/')) if is_dir else pattern in target_path
def parse_filter_args(filter_args, exclude_args):
"""解析命令行過濾參數,生成優先級規則鏈"""
rule_chain = OrderedDict()
sequence = []
# 處理包含規則(-f)
if filter_args:
for item in filter_args.split(','):
item = item.strip()
if not item: continue
is_suffix = item.startswith('.')
rule = FilterRule(item, True, is_suffix)
key = f"include_{item}"
rule_chain[key] = rule
sequence.append(key)
# 處理排除規則(-n)
if exclude_args:
for item in exclude_args.split(','):
item = item.strip()
if not item: continue
is_suffix = item.startswith('.')
rule = FilterRule(item, False, is_suffix)
key = f"exclude_{item}"
rule_chain[key] = rule # 后出現規則覆蓋先出現規則
sequence.append(key)
return [rule_chain[key] for key in sequence]
def should_include_file(rel_path, rules):
"""根據規則鏈決策是否包含文件"""
dir_part = os.path.dirname(rel_path)
file_name = os.path.basename(rel_path)
if not rules:
return True # 無規則時默認包含
# 逆序應用規則(后出現優先級更高)
for rule in reversed(rules):
if not rule.is_suffix and rule.match(dir_part, is_dir=True):
return rule.is_include
if rule.is_suffix and rule.match(file_name):
return rule.is_include
return not any(rule.is_include for rule in rules) # 無匹配時的默認策略
def process_source_tree(source_dir, output_dir, prefix="", rules=None, digits=0, num_groups=0):
"""處理文件并分組存儲"""
safe_prefix = sanitize_filename(prefix).rstrip('_') if prefix else ""
files_to_export = [] # 存儲需要導出的文件路徑
# 第一次遍歷:收集需要導出的文件
total_count = 0
for root, _, files in os.walk(source_dir):
for filename in files:
total_count += 1
src_path = os.path.join(root, filename)
rel_path = os.path.relpath(src_path, source_dir)
if should_include_file(rel_path, rules):
files_to_export.append(rel_path)
included_count = len(files_to_export)
if included_count == 0:
print("沒有需要導出的文件")
return total_count, 0, 0
# 計算分組參數
group_size = 0
group_dirs = []
if num_groups > 0:
group_size = math.ceil(included_count / num_groups) # 每組文件數(向上取整)
# 創建分組目錄
for i in range(1, num_groups + 1):
group_dir = os.path.join(output_dir, f"group_{i}")
os.makedirs(group_dir, exist_ok=True)
group_dirs.append(group_dir)
print(f"創建 {num_groups} 個分組目錄,每組最多 {group_size} 個文件")
# 第二次遍歷:處理文件并分組
current_index = 0
for rel_path in files_to_export:
current_index += 1
src_path = os.path.join(source_dir, rel_path)
# 確定目標目錄
if num_groups > 0:
group_index = (current_index - 1) // group_size # 計算分組索引
target_dir = group_dirs[group_index]
else:
target_dir = output_dir
# 生成帶編號的文件名
counter_str = f"{current_index:0{digits}d}_" if digits > 0 else ""
base_name = sanitize_filename(rel_path)
# 智能后綴處理
_, ext = os.path.splitext(os.path.basename(rel_path))
if ext.lower() == '.txt':
encoded_name = base_name
else:
encoded_name = base_name + ".txt"
# 組合最終文件名
if safe_prefix:
final_name = f"{counter_str}{safe_prefix}_{encoded_name}"
else:
final_name = f"{counter_str}{encoded_name}"
dest_path = os.path.join(target_dir, final_name)
# 復制文件
shutil.copy2(src_path, dest_path)
# 打印處理信息
if num_groups > 0:
group_info = f" → group_{group_index+1}/{final_name}"
else:
group_info = f" → {final_name}"
print(f"Processed [{current_index}/{included_count}]: {rel_path}{group_info}")
return total_count, included_count, current_index
def main():
parser = argparse.ArgumentParser(
description="Linux內核源碼導出工具(分組存儲版)",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-s", "--source", required=True, help="內核源碼根目錄路徑")
parser.add_argument("-o", "--output", required=True, help="輸出目錄路徑")
parser.add_argument("-p", "--prefix", default="",
help="文件名前綴(下劃線分隔)")
parser.add_argument("-f", "--filter", default="",
help="包含規則(逗號分隔,如 'kernel,.c')")
parser.add_argument("-n", "--exclude", default="",
help="排除規則(逗號分隔,如 'docs,.txt')")
parser.add_argument("-d", "--digits", type=int, default=5,
help="編號位數(0=禁用,建議≥4)")
parser.add_argument("-t", "--groups", type=int, default=0,
help="分組數量(0=不分組)")
args = parser.parse_args()
# 路徑驗證
if not os.path.isdir(args.source):
print(f"錯誤:源目錄不存在 {args.source}")
sys.exit(1)
# 創建輸出目錄
os.makedirs(args.output, exist_ok=True)
# 解析過濾規則
rules = parse_filter_args(args.filter, args.exclude)
# 打印參數摘要
print(f"源碼目錄: {args.source}")
print(f"輸出目錄: {args.output}")
print(f"文件名前綴: '{args.prefix}'" if args.prefix else "未指定前綴")
print(f"編號設置: {args.digits}位數字" if args.digits > 0 else "編號功能已禁用")
print(f"分組設置: {args.groups}組" if args.groups > 0 else "不啟用分組")
if args.filter: print(f"包含規則: {args.filter}")
if args.exclude: print(f"排除規則: {args.exclude}")
if rules:
print("\n激活規則(優先級降序):")
for i, rule in enumerate(reversed(rules), 1):
rule_type = "包含" if rule.is_include else "排除"
rule_target = "后綴" if rule.is_suffix else "目錄"
print(f" {i}. [{rule_type}] {rule_target} '{rule.pattern}'")
print("\n" + "=" * 60)
# 執行文件處理
total, included, seq_count = process_source_tree(
args.source,
args.output,
args.prefix,
rules,
args.digits,
args.groups
)
print("=" * 60)
print(f"處理完成!掃描文件: {total}, 導出文件: {included}")
if args.digits > 0:
print(f"文件編號范圍: 1 - {seq_count} (位數:{args.digits})")
if seq_count >= (10 ** args.digits):
print(f"?? 警告: 文件數超過編號容量,請增加 --digits 參數值")
if __name__ == "__main__":
main()
本文來自博客園,作者:dolinux,未經同意,禁止轉載

浙公網安備 33010602011771號