用Python进行docx文档合并
使用docx和docxcompose包进行docx文档合并
# 使用阿里云镜像安装相关模块
pip install python-docx -i https://mirrors.aliyun.com/pypi/simple/
pip install docxcompose -i https://mirrors.aliyun.com/pypi/simple/
1
2
2
# 代码1
该代码实现了将工作目录下所有的docx文件内容进行合并,每个文件间以分页符分隔开。
from docxcompose.composer import Composer
import docx
import os
doclist = os.listdir()
doc = Composer(docx.Document())
for index,file in enumerate(doclist):
if file.endswith('.docx'):
sub_doc = docx.Document(file)
if index < len(doclist)-1:
sub_doc.add_page_break()
doc.append(sub_doc)
doc.save('out.docx')
del doc
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# 代码2
该代码添加了将文件夹下所有doc转换成docx再进行合并的模块
from docxcompose.composer import Composer
import os
import docx
workdir=r'C:\\Users\\tangyw\\Documents\\test' # 此处输入目录
from win32com import client as wc
w = wc.DispatchEx('Word.Application')
# 通过遍历将所有的doc转换成docx
doclist = os.listdir(workdir)
for index, file in enumerate(doclist):
if file.endswith(\'.doc\'):
doc=w.Documents.Open(os.path.join(workdir,file))
doc.SaveAs(os.path.join(workdir,file+'x'),12)
doc.Close()
w.Quit()
# 再通过遍历将所有的docx合并
doclist = os.listdir(workdir)
doc=Composer(docx.Document())
for index, file in enumerate(doclist):
if file.endswith(\'.docx\'):
sub_doc=docx.Document(os.path.join(workdir,file))
if index <len(doclist)-1:
sub_doc.add_page_break()
doc.append(sub_doc)
doc.save(os.path.join(workdir,'out.docx'))
del doc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
上次更新: 2024/03/11, 23:50:27