安装python-docx模块
pip3 install python-docx
读取docx里的内容
#python操作word
from docx import Document
#把本地的练习.docx解析成了一个树形结构
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
print(para.text)
打印每个块run
from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
for run in para.runs:
print(run.text)
替换word里的内容
from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
if para.text.startswith('Document'):
para.text= para.text.replace('Document',"Singer")
doc.save(r"C:\Users\wang\Desktop\百度云下载\aaa.docx")
#替换后格式不同于之前
保持原格式内容
from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
for run in para.runs:
if run.text.startswith('Document'):
run.text= run.text.replace('Document',"Singer")
doc.save(r"C:\Users\wang\Desktop\百度云下载\bbb.docx")
读取docx里的表格内容
from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习2.docx")
table = doc.tables[0]
#行读取
for row in table.rows:
for cell in row.cells:
for para in cell.paragraphs:
print(para.text)
from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习2.docx")
table = doc.tables[0]
#列读取
for col in table.columns:
for cell in col.cells:
for para in cell.paragraphs:
print(para.text)
python操作pptx
from pptx import Presentation
ppt = Presentation(r"C:\Users\wang\Desktop\百度云下载\数据报告模板.pptx")
# for slide in ppt.slides:
# print(slide)
page2 = ppt.slides[1]
for shape in page2.shapes:
if shape.has_text_frame:
for para in shape.has_text_frame.paragraphs:
for run in para.runs:
run.text = run.text.replace('情况','水平')
ppt.save(r"C:\Users\wang\Desktop\百度云下载\数据报告模板副本.pptx")
0 Comments