import os
from docx import Document

input_dir = r"C:\Users\lenovo\Desktop\ea_rag_project\content"

for file in os.listdir(input_dir):
    if file.endswith('.docx') and not file.startswith('~$'):  # ← 关键：跳过 ~$ 临时文件
        doc = Document(os.path.join(input_dir, file))
        full_text = '\n'.join([para.text for para in doc.paragraphs])
        txt_path = os.path.join(input_dir, file.replace('.docx', '.txt'))
        with open(txt_path, 'w', encoding='utf-8') as f:
            f.write(full_text)
        print(f"📄 已提取：{txt_path}")