安装python-docx模块

pip3 install python-docx

读取docx里的内容

#python操作word
from docx import Document
#把本地的练习.docx解析成了一个树形结构
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
    print(para.text)

打印每个块run

from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
    for run in para.runs:
        print(run.text)

替换word里的内容

from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
      if para.text.startswith('Document'):
          para.text= para.text.replace('Document',"Singer")
doc.save(r"C:\Users\wang\Desktop\百度云下载\aaa.docx")

#替换后格式不同于之前

保持原格式内容

from docx import Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习.docx")
for para in doc.paragraphs:
    for run in para.runs:
          if run.text.startswith('Document'):
              run.text= run.text.replace('Document',"Singer")
doc.save(r"C:\Users\wang\Desktop\百度云下载\bbb.docx")

读取docx里的表格内容

from  docx import  Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习2.docx")
table = doc.tables[0]
#行读取
for row in table.rows:  
    for cell in row.cells:
        for para in cell.paragraphs: 
            print(para.text)

from  docx import  Document
doc = Document(r"C:\Users\wang\Desktop\百度云下载\练习2.docx")
table = doc.tables[0]
#列读取
for col in table.columns:
    for cell in col.cells:
        for para in cell.paragraphs: 
            print(para.text)

python操作pptx

from pptx import Presentation
ppt = Presentation(r"C:\Users\wang\Desktop\百度云下载\数据报告模板.pptx")
# for slide in ppt.slides:
#     print(slide)
page2 = ppt.slides[1]
for shape in page2.shapes:
    if shape.has_text_frame:
        for para in shape.has_text_frame.paragraphs:
            for run in para.runs:
                run.text = run.text.replace('情况','水平')
ppt.save(r"C:\Users\wang\Desktop\百度云下载\数据报告模板副本.pptx")
Categories: python

0 Comments

发表评论

Avatar placeholder

邮箱地址不会被公开。 必填项已用*标注