victory的博客

长安一片月,万户捣衣声

0%

从多路径多Excel表中获取数据并存入新表

实现代码:

import os

work_path = os.getcwd() + "\\资料"  # 获取当前工作路径,指定“资料”文件夹
# 获取路径下所有.xlsx文件,并存入列表
pathss = []  # 存储文件夹内所有文件的路径(包括子目录内的文件)
for root, dirs, files in os.walk(work_path):
    path = [os.path.join(root, name) for name in files]  # 将目录和文件名连接起来,才是完整文件路径
    for i in range(len(path)):  # 遍历所有文件的地址
        if path[i].endswith(".xlsx"):  # 只提取后缀为xlsx的文件
            pathss.append(path[i])

# 定义函数,获取Excel表格中的ID数据
from openpyxl import load_workbook  # 用于读取Excel中的信息


def Get_system_ID(file):
    wb = load_workbook(file)
    ws = wb.active

    ID_list = []
    for row in range(2, ws.max_row + 1):
        ID = ws["A" + str(row)].value  # ID信息在A列
        if ID != None:  # 过滤空值
            ID_list.append(ID)
    return ID_list


# 获取数据,存入总列表
total_list = []
for file in pathss:
    info = Get_system_ID(file)
    total_list += info

# 写入数据到新的excel表,并设置格式
from openpyxl import Workbook
from openpyxl.styles import Font, colors, Alignment  # 字体,颜色,对齐
from openpyxl.styles import PatternFill  # 单元格填充

wb = Workbook()  # 新建Excel工作簿
ws = wb.active  # 使用活动工作表
ws.column_dimensions['A'].width = 18.5  # 设定A列宽度
ws.cell(row=1, column=1, value="ID")  # 写入字段名
color_fill = PatternFill(fill_type='solid', fgColor="B3CFA1")  # 设置底色
ws.cell(row=1, column=1).fill = color_fill  # 填充底色

# 批量从列表中提取数据并写入
for row in range(1, len(total_list) + 1):
    ws.cell(row=row + 1, column=1, value=total_list[row - 1])

# 设置字号及对齐
font_set = Font(name='Arial', size=9)
for i in range(1, ws.max_row + 1):
    ws.cell(row=i, column=1).font = font_set
    ws.cell(row=i, column=1).alignment = Alignment(horizontal='left', vertical='center', shrink_to_fit=True)

wb.save(os.getcwd() + "\\ID.xlsx")

print(f"\n共获取到 {len(pathss)} 个 Excel表,共 {len(total_list)} 个ID。")

python为每个学生出不一样的题

实现代码:

import random


# 不重复随机整数生成函数
def Random_num(num_max, num_qty):
    '''
    num_max:最大数
    num_qty:生成随机数的个数
    '''
    num_list = []  # 储存生成的随机数
    while len(num_list) < num_qty:  # 控制随机数的个数
        num = random.randint(2, num_max)  # 设定在此范围内取数
        if num in num_list:  # 判断随机数是否重复
            continue  # 若重复,则重新生成
        else:
            num_list.append(num)  # 将不重复的随机数放入列表
    return num_list  # 生成完成后返回随机数列表


# 定义函数,按随机数在题库中抽取对应编号的题目
from openpyxl import load_workbook


def Question(que_type, numbers):
    '''
    que_type:试题类型("单选题","多选题","判断题","填空题")
    numbers:需要抽取的试题编号
    '''
    questions = []  # 储存抽取的题目
    wb = load_workbook("题库.xlsx")  # 载入题库
    if que_type == "单选题":
        ws = wb[que_type]
        for i in numbers:  # 按随机生成的编号抽题
            question = ws["B" + str(i)].value  # 问题在B列
            answerA = "A:\t" + str(ws["C" + str(i)].value)  # 选项A在C列,"\t"相当于按一下tab键,在字符间产生间隔
            answerB = "B:\t" + str(ws["D" + str(i)].value)  # 选项B在D列
            answerC = "C:\t" + str(ws["E" + str(i)].value)  # 选项C在E列
            answerD = "D:\t" + str(ws["F" + str(i)].value)  # 选项D在F列
            right_answer = ws["G" + str(i)].value  # 正确答案在G列
            single_question = [question, answerA, answerB, answerC, answerD, right_answer]  # 每行的数据存入列表
            questions.append(single_question)  # 每个题目的数据存入总列表
    elif que_type == "多选题":
        ws = wb[que_type]
        for i in numbers:
            question = ws["B" + str(i)].value
            answerA = "A:\t" + str(ws["C" + str(i)].value)
            answerB = "B:\t" + str(ws["D" + str(i)].value)
            answerC = "C:\t" + str(ws["E" + str(i)].value)
            answerD = "D:\t" + str(ws["F" + str(i)].value)
            right_answer = ws["H" + str(i)].value
            single_question = [question, answerA, answerB, answerC, answerD, right_answer]
            if ws["G" + str(i)].value:  # 有些题有E选项,有些没有,因此需要判断一下是否有E选项
                answerE = "E:\t" + str(ws["G" + str(i)].value)
                single_question.insert(-1, answerE)  # 将E选项插入到答案前面,保持答案是最后一个元素
            questions.append(single_question)
    else:  # 判断题和填空题,内容只取题干和答案
        ws = wb[que_type]
        for i in numbers:
            question = ws["B" + str(i)].value
            right_answer = ws["C" + str(i)].value
            single_question = [question, right_answer]
            questions.append(single_question)

    return questions


# 写入考试题到word文件
from docx import Document
from docx.shared import Pt  # 用于设定字体大小(磅值)
from docx.oxml.ns import qn  # 用于应用中文字体


def To_word(number, questions_data):
    doc = Document("试题-模板.docx")

    # 写入单选题
    title1 = "一、单项选择题(共40题,每题1分)"
    p = doc.add_paragraph()  # 插入段落
    r = p.add_run(title1)  # 插入文字块
    r.bold = True  # 字体加粗
    r.font.size = Pt(12)  # 字号设为12磅
    for index, i in enumerate(questions_data["单选题"], start=1):  # 给题目从1开始编号
        doc.add_paragraph(f"{index}. {i[0]}")  # 题干部分在单独一段
        doc.add_paragraph(f"\t{i[1]}\t\t{i[2]}")  # 选项A和选项B在同一段落
        doc.add_paragraph(f"\t{i[3]}\t\t{i[4]}")  # 选项C和选项D在同一段落

    # 写入多选题
    title2 = "二、多项选择题(共20题,每题2分)"
    p = doc.add_paragraph()
    r = p.add_run(title2)
    r.bold = True
    r.font.size = Pt(12)
    for index, i in enumerate(questions_data["多选题"], start=1):
        doc.add_paragraph(f"{index}. {i[0]}")
        doc.add_paragraph(f"\t{i[1]}\t\t{i[2]}")
        doc.add_paragraph(f"\t{i[3]}\t\t{i[4]}")
        if len(i) == 7:  # 判断是否有E选项,若有,则新建一段落写入
            doc.add_paragraph(f"\t{i[5]}")

    # 写入判断题
    title3 = "三、判断题(共10题,每题1分)"
    p = doc.add_paragraph()
    r = p.add_run(title3)
    r.bold = True
    r.font.size = Pt(12)
    for index, i in enumerate(questions_data["判断题"], start=1):
        doc.add_paragraph(f"\t{index}. {i[0]}")

    # 写入填空题
    title4 = "四、填空题(共10题,每题1分)"
    p = doc.add_paragraph()
    r = p.add_run(title4)
    r.bold = True
    r.font.size = Pt(12)
    for index, i in enumerate(questions_data["填空题"], start=1):
        doc.add_paragraph(f"\t{index}. {i[0]}")

    doc.save(f"试卷及答案\\考试题{number}.docx")


# 写入答案
from docx import Document
from docx.shared import Pt  # 用于设定字体大小(磅值)
from docx.oxml.ns import qn  # 用于应用中文字体


def Answer(number, questions_data):
    doc = Document()
    # 全局字体设为“宋体”
    doc.styles['Normal'].font.name = u'宋体'
    doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')

    title = "计算机系2020第二学期期末考试题(答案)"
    p = doc.add_paragraph()
    r = p.add_run(title)
    r.bold = True
    r.font.size = Pt(20)
    # 写入单选题答案
    title1 = "一、单项选择题答案(共40题,每题1分)"
    p = doc.add_paragraph()
    r = p.add_run(title1)
    r.bold = True
    r.font.size = Pt(12)

    p = doc.add_paragraph()
    for index, i in enumerate(questions_data["单选题"], start=1):
        p.add_run(f"{index}. {i[-1]}\t")
        if index % 10 == 0:  # 每段只显示10个答案
            p = doc.add_paragraph()  # 满10个,则新建段落

    # 写入多选题答案
    title2 = "二、多项选择题答案(共20题,每题2分)"
    p = doc.add_paragraph()
    r = p.add_run(title2)
    r.bold = True
    r.font.size = Pt(12)
    p = doc.add_paragraph()
    for index, i in enumerate(questions_data["多选题"], start=1):
        p.add_run(f"{index}. {i[-1]}\t")
        if index % 3 == 0:
            p = doc.add_paragraph()

            # 写入判断题答案
    title3 = "三、判断题答案(共10题,每题1分)"
    p = doc.add_paragraph()
    r = p.add_run(title3)
    r.bold = True
    r.font.size = Pt(12)
    p = doc.add_paragraph()
    for index, i in enumerate(questions_data["判断题"], start=1):
        p.add_run(f"{index}. {i[-1]}\t")
        if index % 5 == 0:  # 每段只显示5个答案
            p = doc.add_paragraph()  # 满5个,则新建段落

    # 写入填空题
    title4 = "四、填空题答案(共10题,每题1分)"
    p = doc.add_paragraph()
    r = p.add_run(title4)
    r.bold = True
    r.font.size = Pt(12)
    p = doc.add_paragraph()
    for index, i in enumerate(questions_data["填空题"], start=1):
        p.add_run(f"{index}. {i[-1]}\t\t")
        if index % 2 == 0:  # 每段只显示2个答案
            p = doc.add_paragraph()  # 满2个,则新建段落

    doc.save(f"试卷及答案\\考试题{number}答案.docx")


# 主函数
for number in range(1, 21):  # 不同的试卷数量,此处为20套
    # 生成随机题目编号
    num_single_choice = Random_num(566, 40)
    num_mult_choice = Random_num(196, 20)
    num_judgment = Random_num(418, 10)
    num_completion = Random_num(190, 10)
    # 将生成的编号存入字典`question_num`
    question_num = {"单选题号": num_single_choice,
                    "多选题号": num_mult_choice,
                    "判断题号": num_judgment,
                    "填空题号": num_completion
                    }
    # 根据随机生成的题目编号去题库选题,并存入`questions_data`
    questions_data = {
        "单选题": Question("单选题", question_num["单选题号"]),
        "多选题": Question("多选题", question_num["多选题号"]),
        "判断题": Question("判断题", question_num["判断题号"]),
        "填空题": Question("填空题", question_num["填空题号"])
    }
    # 将试题写入word文档,并保存
    To_word(number, questions_data)
    # 将试题答案写入word文档,并保存
    Answer(number, questions_data)
    print(f"试卷{number}及答案完成!")

试题-模板.docx
题库.xlsx

定义打印函数,以便重复调用

实现代码:

import win32api


def Print(fileName):
    win32api.ShellExecute(
        0,  # 指定父窗口句柄,搞不懂
        "print",  # 指定操作,这里的"print"表示启动打印应用程序
        fileName,  # 要打印的文件名
        None,  # 打印机设置,若是"None",则使用windows设置的默认打印机
        ".",  # 指定默认目录,照抄的,搞不懂
        0  # 若fileName参数是一个可执行程序,则此参数指定程序窗口的初始显示方式,否则此参数应设置为0
    )


# 获取待打印文件的路径
import os

path = '文件'  # 文件所在文件夹
files = [path + "\\" + i for i in os.listdir(path)]  # 获取文件夹下的文件名,并拼接完整路径

# 批量打印
for file in files:
    Print(file)

从原Excel表中抽出数据存入同一文件的新的Sheet

实现代码:

import pandas as pd

df = pd.read_excel("物料表.xlsx", header=2)
df.head()
#     请求单号     单位     物料编号     架位     批号     数量     日期
# 0     A19X9239156     EA     11010XR0073700     LC000001     PC00001     3     2019-01-03
# 1     A19X9239156     EA     11020XR0025500     LC000002     PC00002     10     2019-01-03
# 2     A19X9239156     EA     11030XR0013200     LC000003     PC00003     3     2019-01-03
# 3     A19X9239156     EA     11140XR0000100     LC000004     PC00004     3     2019-01-03
# 4     A19X9239156     EA     12121XR0172300     LC000005     PC00005     3     2019-01-03

df['月份'] = df['日期'].dt.month
df.head()
#     请求单号     单位     物料编号     架位     批号     数量     日期     月份
# 0     A19X9239156     EA     11010XR0073700     LC000001     PC00001     3     2019-01-03     1
# 1     A19X9239156     EA     11020XR0025500     LC000002     PC00002     10     2019-01-03     1
# 2     A19X9239156     EA     11030XR0013200     LC000003     PC00003     3     2019-01-03     1
# 3     A19X9239156     EA     11140XR0000100     LC000004     PC00004     3     2019-01-03     1
# 4     A19X9239156     EA     12121XR0172300     LC000005     PC00005     3     2019-01-03     1

df5 = df[df['月份'] == 5]
df5.head()
#     请求单号     单位     物料编号     架位     批号     数量     日期     月份
# 9649     A19X9280200     EA     36012XR0413200     LC009650     PC09650     66     2019-05-03     5
# 9650     A19X9280200     EA     60022XR2298300     LC009651     PC09651     66     2019-05-03     5
# 9651     A19X9280561     EA     72004XR2000001     LC009652     PC09652     604     2019-05-03     5
# 9652     A19X9286759     EA     62010XR0161048     LC009653     PC09653     50     2019-05-03     5
# 9653     A19X9286759     EA     62010XR0502208     LC009654     PC09654     100     2019-05-03     5

df8 = df[df['月份'] == 8]
df8.head()


#     请求单号     单位     物料编号     架位     批号     数量     日期     月份
# 17213     A19X9317364     EA     62010XR0500126     LC017214     PC17214     230     2019-08-01     8
# 17214     A19X9317364     EA     61010XR1120100     LC017215     PC17215     230     2019-08-01     8
# 17215     A19X9317364     EA     62012XR0102262     LC017216     PC17216     230     2019-08-01     8
# 17216     A19X9317364     EA     62010XR050150A     LC017217     PC17217     950     2019-08-01     8
# 17217     A19X9317364     EA     61011XR1150500     LC017218     PC17218     230     2019-08-01     8


# 去掉不需要的月份的数据
def Remove_data(df, month=[]):
    for i in month:
        df = df[df['月份'] != i]
    return df


df_rest = Remove_data(df, [5, 8])
df_rest.head()
#     请求单号     单位     物料编号     架位     批号     数量     日期     月份
# 0     A19X9239156     EA     11010XR0073700     LC000001     PC00001     3     2019-01-03     1
# 1     A19X9239156     EA     11020XR0025500     LC000002     PC00002     10     2019-01-03     1
# 2     A19X9239156     EA     11030XR0013200     LC000003     PC00003     3     2019-01-03     1
# 3     A19X9239156     EA     11140XR0000100     LC000004     PC00004     3     2019-01-03     1
# 4     A19X9239156     EA     12121XR0172300     LC000005     PC00005     3     2019-01-03     1

with pd.ExcelWriter('物料表_1.xlsx', engine='openpyxl',
                    datetime_format='YYYY-MM-DD') as writer:
    df5.drop('月份', axis=1).to_excel(writer, sheet_name='5月', index=False)
    df8.drop('月份', axis=1).to_excel(writer, sheet_name='8月', index=False)
    df_rest.drop('月份', axis=1).to_excel(writer, sheet_name='剩余月份', index=False)

from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment

thin = Side(border_style="thin", color="000000")  # 定义边框粗细及颜色
wb = load_workbook("物料表_1.xlsx")
for sheetname in wb.sheetnames:
    ws = wb[sheetname]

    # 调整列宽
    ws.column_dimensions['A'].width = 12
    ws.column_dimensions['C'].width = 15.5
    ws.column_dimensions['G'].width = 10

    # 设置字号,对齐,缩小字体填充,加边框
    for row_number in range(2, ws.max_row + 1):
        for col_number in range(1, ws.max_column + 1):
            c = ws.cell(row=row_number, column=col_number)
            c.font = Font(size=10)
            c.border = Border(top=thin, left=thin, right=thin, bottom=thin)
            c.alignment = Alignment(horizontal="left", vertical="center")
wb.save("物料表_1.xlsx")

物料表.xlsx

pandas提取指定数据并保存在原Excel工作簿中

实现代码:

import pandas as pd

df = pd.read_excel("物料表.xlsx", header=2)
# print(df)

df500 = df[df["数量"] > 500]
# print(df500)

with pd.ExcelWriter('物料表.xlsx', mode='a', engine='openpyxl',
                    datetime_format='YYYY-MM-DD') as writer:
    df500.to_excel(writer, sheet_name='数量大于500', index=False)

from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment

thin = Side(border_style="thin", color="000000")  # 定义边框粗细及颜色

wb = load_workbook("物料表.xlsx")
ws = wb["数量大于500"]

# 调整列宽
ws.column_dimensions['A'].width = 12
ws.column_dimensions['C'].width = 15.5
ws.column_dimensions['G'].width = 10

# 设置字号,对齐,缩小字体填充,加边框
for row_number in range(2, ws.max_row + 1):
    for col_number in range(1, ws.max_column + 1):
        c = ws.cell(row=row_number, column=col_number)
        c.font = Font(size=10)
        c.border = Border(top=thin, left=thin, right=thin, bottom=thin)
        c.alignment = Alignment(horizontal="left", vertical="center")
wb.save("物料表.xlsx")

物料表.xlsx

python出数学练习题

实现代码:

# import random
#
# for i in range(5):
#     num1 = random.randint(1, 9)
#     num2 = random.randint(1, 9)
#     print(f"{num1} + {num2} = ")


# 加法、乘法题
import random
data = []  # 储存题目的列表
group = []  # 中转列表
while len(data) < 20:  # 题目个数,20行x4列共80个题,刚好放入word文档中的1页
    num1 = random.randint(0, 9)  # 随机整数1,从0-9中选取
    num2 = random.randint(0, 9)  # 随机整数2,从0-9中选取
    group.append(f"{num1} + {num2} = ")  # 数据放入中转列表
    if len(group) == 4:  # 按四道题分一组
        data.append(group)  # 数据放入总列表
        group = []  # 清空中转列表
print(data)

# # 减法题
# import random
#
# data = []
# group = []
# while len(data) < 20:
#     num1 = random.randint(1, 9)
#     num2 = random.randint(1, 9)
#     if num1 > num2:
#         group.append(f"{num1} - {num2} = ")
#         if len(group) == 4:
#             data.append(group)
#             group = []
#
# # print(data)

# # 除法题
# import random
#
# data = []  # 储存题目的列表
# group = []  # 中转列表
# while len(data) < 20:
#     num1 = random.randint(1, 19)
#     num2 = random.randint(1, 19)
#     if num1 >= num2 and num1 % num2 == 0:
#         group.append(f"{num1} ÷ {num2} = ")  # 数据放入中转列表
#         if len(group) == 4:  # 按四道题分一组
#             data.append(group)  # 数据放入总列表
#             group = []  # 清空中转列表
#
# print(data)

# 输出到word文件
def To_word(data, file_name):
    """
    data:需要传入的列表
    file_name:word文件的文件名
    """
    from docx import Document
    from docx.shared import Pt  # 用于设定字体大小(磅值)

    doc = Document()
    for i in data:
        # print(i)
        row = "\t\t".join(i)
        # print(row)
        doc.add_paragraph(row)
    # 设置字号
    for para in doc.paragraphs:
        for run in para.runs:
            run.font.size = Pt(16)  # 文字大小磅值
    doc.save(f"{file_name}.docx")

To_word(data, "加法题")
# To_word(data, "减法题")
# To_word(data, "乘法题")
# To_word(data, "除法题")

一键提取PDF中的表格到Excel

实现代码:

import os
import pdfplumber
from openpyxl import Workbook

path = 'PDF'  # 文件所在文件夹
files = [path + "\\" + i for i in os.listdir(path)]  # 获取文件夹下的文件名,并拼接完整路径
key_words = "主要会计数据"

for file in files:
    with pdfplumber.open(file) as p:
        wb = Workbook()  # 新建excel工作簿
        wb.remove(wb.worksheets[0])  # 删除工作簿自带的工作表

        # 获取关键词所在页及下一页的页码
        pages_wanted = []
        for index, page in enumerate(p.pages):  # 从0开始给所有页编号
            if key_words in page.extract_text():
                pages_wanted.append(index)
                pages_wanted.append(index + 1)
                break

        # 提取指定页码里的表格
        for i in pages_wanted:
            page = p.pages[i]
            tables = page.extract_tables()  # 读取表格
            if tables:  # 判断是否存在表格,若不存在,则不执行下面的语句
                ws = wb.create_sheet(f"Sheet{i + 1}")  # 新建工作表,表名的编号与表在PDF中的页码一致
                for table in tables:  # 遍历所有列表
                    for row in table:  # 遍历列表中的所有子列表,里面保存着行数据
                        ws.append(row)  # 写入excel表
        wb.save("Excel\\{}.xlsx".format(file.split("\\")[1].split(".")[0]))

PDF:
东旭蓝天:2019年年度报告.PDF
华特气体:2019年年度报告(修订版).PDF

批量修改word文件中的段落格式

实现代码:

import os
import docx
# 定义字体格式
from docx.shared import Pt  # 用于设定字体大小(磅值)
from docx.oxml.ns import qn  # 用于应用中文字体


def F_title(run):
    # 标题文字
    run.font.size = Pt(22)  # 文字大小磅值
    run.bold = True  # 加粗
    run.font.name = "方正小标宋_GBK"  # 字体
    r = run._element.rPr.rFonts
    r.set(qn("w:eastAsia"), "方正小标宋_GBK")  # 字体


def F_name_dept(run):
    # 姓名,部门,日期
    run.font.size = Pt(17)  # 文字大小磅值
    run.bold = False  # 加粗
    run.font.name = "楷体"  # 字体
    r = run._element.rPr.rFonts
    r.set(qn("w:eastAsia"), "楷体")  # 字体


def F_main(run):
    # 正文的格式
    run.font.size = Pt(17)  # 文字大小磅值
    run.bold = False  # 加粗
    run.font.name = "仿宋"  # 字体
    r = run._element.rPr.rFonts
    r.set(qn("w:eastAsia"), "仿宋")  # 字体


def F_title1(run):
    # 标题一的格式
    run.font.size = Pt(17)  # 文字大小磅值
    run.bold = False  # 加粗
    run.font.name = "黑体"  # 字体
    r = run._element.rPr.rFonts
    r.set(qn("w:eastAsia"), "黑体")  # 字体


def F_title2(run):
    # 标题二的格式
    run.font.size = Pt(17)  # 文字大小磅值
    run.bold = True  # 加粗
    run.font.name = "楷体"  # 字体
    r = run._element.rPr.rFonts
    r.set(qn("w:eastAsia"), "楷体")  # 字体


# 获取待处理的文件的路径
path = '待处理文件'  # 文件所在文件夹
files = [path + "\\" + i for i in os.listdir(path)]  # 获取文件夹下的文件名,并拼接完整路径
# print(files)
# 逐个提取文件,设置字体格式
for file in files:
    doc = docx.Document(file)
    for run in doc.paragraphs[0].runs:  # 总标题字体格式
        F_title(run)

    for para in doc.paragraphs[1:3]:  # 部门、姓名及日期字体格式
        for run in para.runs:
            F_name_dept(run)

    title1 = ["一、", "二、", "三、", "四、"]  # 标题一的唯一特征字符串
    title2 = ["1、", "2、", "3、", "4、"]  # 标题二的唯一特征字符串
    for para in doc.paragraphs[3:]:
        if any(i in para.text for i in title1):  # 若该段落是标题一,则应用标题一的字体格式
            for run in para.runs:
                F_title1(run)
        elif any(j in para.text for j in title2):  # 若该段落是标题二,则应用标题二的字体格式
            for run in para.runs:
                F_title2(run)
        else:
            for run in para.runs:  # 其余都应用正文的字体格式
                F_main(run)
    doc.save('已处理文件\\{}'.format(file.split("\\")[1]))
    

已处理文件:
报告-何十.docx
报告-刘七.docx
报告-张三.docx
报告-朱八
报告-李四.docx
报告-猪八戒.docx
报告-王五.docx
报告-秦九.docx
报告-赵六.docx
报告-黄二.docx

python替换word文档中的文字

实现代码:

# -*- coding:utf-8 -*-
import docx


# # 定义函数替换文字块中的字符
# def info_update(doc, old_info, new_info):
#     for para in doc.paragraphs:  # 遍历段落
#         for run in para.runs:  # 遍历文字块
#             # run.text = run.text.replace(old_info, new_info)  # 替换
#             print(run.text)
#
#
# doc = docx.Document('替换前.docx')
# info_update(doc, '第四次', '第五次')
# info_update(doc, '2019', '2020')
# info_update(doc, '18', '10')
# doc.save('替换后.docx')

# doc = docx.Document('替换前.docx')
# for para in doc.paragraphs:  # 遍历段落
#     for run in para.runs:  # 遍历文字块
#         # run.text = run.text.replace(old_info, new_info)  # 替换
#         print(run.text)

# # 按段落查看文字
# doc = docx.Document('替换前.docx')
# for para in doc.paragraphs:
#     print(para.text)

import docx
from docx.shared import Pt  # 用于设定字体大小(磅值)
from docx.oxml.ns import qn  # 用于应用中文字体


def info_update(doc, old_info, new_info):
    for para in doc.paragraphs:
        para.text = para.text.replace(old_info, new_info)

    # 设置第一段(标题)的文字格式
    for run in doc.paragraphs[0].runs:
        run.font.size = Pt(14)  # 文字大小磅值
        run.bold = True  # 加粗
        run.font.name = "微软雅黑"  # 字体选择
        # 中文字体应用,固定写法
        r = run._element.rPr.rFonts  # 字体,固定写法
        r.set(qn("w:eastAsia"), "微软雅黑")  # 字体

    # 设置第二及后续段落的文字格式
    for para in doc.paragraphs[1:]:
        for run in para.runs:
            run.font.size = Pt(12)  # 文字大小
            run.bold = False  # 不加粗
            run.font.name = "微软雅黑"  # 字体选择
            # 中文字体应用,固定写法
            r = run._element.rPr.rFonts
            r.set(qn("w:eastAsia"), "微软雅黑")


doc = docx.Document('替换前.docx')
info_update(doc, "第四次", "第五次")
info_update(doc, "2019", "2020")
info_update(doc, "18", "10")
doc.save('替换后_设置格式.docx')

替换前.docx