Python批量将word文档转换类型

python将word文档转为需要的格式


比如在查重时候,要求文档格式统一,那么就可以批量对文档进行转化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
from win32com import client as wc
import time

def save_doc_to_docx(raw_path, new_path): # doc转docx
'''
:param rawpath: 传入和传出文件夹的路径
:return: None
'''
word = wc.Dispatch("Word.Application")
# 不能用相对路径,老老实实用绝对路径
# 需要处理的文件所在文件夹目录
filenamelist = os.listdir(raw_path)
if not os.path.isdir(new_path):
os.makedirs(new_path)
for filename in filenamelist:
try:
if not filename.startswith('~$') and (filename.endswith('.doc') or filename.endswith('.docx')):
doc = word.Documents.Open(raw_path + filename)
rename = os.path.splitext(filename) # 将文件名与后缀分开
new_file_path = '{new_path}{filename}.docx'.format(new_path=new_path, filename=rename[0])
# doc.SaveAs(new_file_path, 12) # 12表示docx格式,16表示doc格式
doc.SaveAs(new_path + rename[0] + '.docx', 12) # 12表示docx格式,16表示doc格式
doc.Close() # 关闭文件
except Exception as e:
print(filename)
continue
word.Quit()



if __name__ == '__main__':
path = "C:\\Users\\ASUS\\Desktop\\软件1705班编译原理实验一\\"
new_path = "C:\\Users\\ASUS\\Desktop\\软件工程1705班汇编原理实验一\\"
save_doc_to_docx(path,new_path)