Python处理学习通题目

本文最后更新于358 天前，其中的信息可能已经过时，如有错误请发送邮件到2067965693@qq.com

1.导出学习通题目html文件

2.改名

3.运行程序

from bs4 import BeautifulSoup
import re
from docx import Document
from pathlib import Path

file_path = Path("output.docx")
if file_path.exists():
    if file_path.is_file():  # 精确判断文件
        print("源文件存在")
        doc = Document("output.docx")
    else:
        print("路径存在，但非文件")
else:
    print("源文件不存在")
    doc = Document()

def extract_fill_in_questions(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # 提取所有填空题
    fill_questions = soup.select("div.mark_item")
    for a in fill_questions:
        c = a.select("div.aiArea")
        for d in c:
            name = d.select("h3.mark_name")
            letter = d.select("ul.mark_letter")
            anser = d.select(".rightAnswerContent")
            for i in name:
                print(i.get_text())
                # 写入到doc中
                doc.add_paragraph(i.get_text())
            if letter:
                for j in letter:
                    print(j.get_text())
                    doc.add_paragraph(j.get_text())
            for k in anser:
                print(k.get_text())
                doc.add_paragraph(k.get_text())
            print("===========================================================")
    doc.save('output.docx')

# 使用示例
if __name__ == "__main__":
    with open('222.html', 'r', encoding='utf-8') as f:
        html_content = f.read()
    extract_fill_in_questions(html_content)

发送评论编辑评论

发送评论 编辑评论

推荐文章

发送评论编辑评论