object-assembler / code /cube3d /training /missing_obj_checker.py
0xZohar's picture
Add code/cube3d/training/missing_obj_checker.py
6f73dc8 verified
import os
import re
import shutil
# 未找到对应OBJ的前缀列表
missing_prefixes = [
"6014a", "5654", "98138pb010", "3069bpb0348", "2431pb038",
"98138pb058", "MAYDAY0329", "85984pb289", "3069bpx36",
"44301a", "3069bpb001", "3069bpb0399", "98138pb033",
"3039pb041", "3069BPB0648", "MMAYDAY25TH", "4304"
]
# 配置路径
ldr_directory = "/public/home/wangshuo/gap/assembly/data/car_1k/subset_self/ldr_l30_rotrans_expand_wom" # LDR源目录
target_directory = "/public/home/wangshuo/gap/assembly/cubedit/outputs/missingobj_ldr_files" # 目标文件夹
# 创建目标目录(如果不存在)
os.makedirs(target_directory, exist_ok=True)
# 将缺失前缀转为小写(统一基准)
missing_prefixes_lower = [prefix.lower() for prefix in missing_prefixes]
# 结果字典:{原始前缀: [使用该前缀的LDR文件列表]}
usage_results = {prefix: [] for prefix in missing_prefixes}
# 记录每个前缀要复制的LDR文件(每个前缀最多一个)
prefix_to_ldr = {}
# 遍历LDR文件
for filename in os.listdir(ldr_directory):
if filename.lower().endswith('.ldr') and len(prefix_to_ldr) < len(missing_prefixes): # 所有前缀都找到对应文件后可提前退出
filepath = os.path.join(ldr_directory, filename)
try:
with open(filepath, 'r', encoding='utf-8') as f:
content_lower = f.read().lower()
# 检查每个小写前缀是否在小写内容中出现
for i, prefix_lower in enumerate(missing_prefixes_lower):
original_prefix = missing_prefixes[i]
# 如果该前缀还没有找到对应的LDR文件,则进行匹配
if original_prefix not in prefix_to_ldr:
pattern = r'(?<!\w)' + re.escape(prefix_lower) + r'(?!\w)'
if re.search(pattern, content_lower):
usage_results[original_prefix].append(filename)
# 记录第一个找到的LDR文件
prefix_to_ldr[original_prefix] = filepath
except UnicodeDecodeError:
print(f"警告:文件 {filename} 编码异常,已跳过")
continue
# 复制每个前缀对应的LDR文件(每个前缀只复制一个)
copied_files = []
for prefix, src_path in prefix_to_ldr.items():
filename = os.path.basename(src_path)
dest_path = os.path.join(target_directory, filename)
# 避免重复复制
if not os.path.exists(dest_path):
shutil.copy2(src_path, dest_path) # 保留文件元数据
copied_files.append(f"{prefix} -> {filename}")
print(f"已复制: {prefix} -> {filename}")
# 打印结果
print("\n===== 未找到OBJ前缀的LDR文件使用情况 =====")
for prefix, files in usage_results.items():
if files:
print(f"\n前缀 '{prefix}' 被以下LDR文件使用:")
for file in files:
print(f" - {file}")
else:
print(f"\n前缀 '{prefix}' 未被任何LDR文件使用")
# 结果汇总
used_prefixes = [prefix for prefix, files in usage_results.items() if files]
unused_prefixes = [prefix for prefix, files in usage_results.items() if not files]
print("\n===== 结果汇总 =====")
print(f"被使用的缺失前缀 ({len(used_prefixes)}个):")
print(", ".join(used_prefixes))
print(f"\n未被使用的缺失前缀 ({len(unused_prefixes)}个):")
print(", ".join(unused_prefixes))
print(f"\n已为 {len(copied_files)} 个前缀各复制了一个相关LDR文件到目标目录: {target_directory}")
if copied_files:
print("复制详情:")
for item in copied_files:
print(f" {item}")