标签 Excel 下的文章 - 生活中的Hygge-过去无可挽回,未来可以改变

登录

标签搜索

Hygge

累计撰写 98 篇文章
累计收到 450 条评论

搜索到 1 篇与的结果

2024-04-26
蚂蚁智学题库爬虫并整理到Excel 接的小私活，目标爬取下来题库并整理到Excel里。目标站点:https://www.mayizhixue.cn/{cloud title="蚂蚁智学" type="bd" url="https://pan.baidu.com/s/1pjg1vTojaazfebpCT1J9SQ?pwd=d2nv" password="d2nv"/}import requests from openpyxl import Workbook from openpyxl.utils import get_column_letter from openpyxl import load_workbook import os common_headers = { 'Authorization': 'TOKEN', } record_id = 1 wb = load_workbook(filename='sample.xlsx') ws = wb.active rows = ws.rows def get_target_row_number(): rows = ws.rows idx = 1 for row in rows: # for cell in row: # print(cell.value, end=' ') # print() if row[0].value is None: return idx idx = idx + 1 return idx def write_row(row, kIndex=None): global record_id rowNumber = get_target_row_number() # print(f"当前行数:{rowNumber}") if kIndex is None: ws.cell(row=rowNumber, column=1).value = record_id else: ws.cell(row=rowNumber, column=1).value = str(record_id) + '.' + str(kIndex) ws.cell(row=rowNumber, column=2).value = row.get('title', '') ws.cell(row=rowNumber, column=3).value = row.get('type', '') # ws.cell(row=rowNumber, column=4).value = row.get('type') # 分数 # ws.cell(row=rowNumber, column=5).value = row.get('type') # 难度 option_idx = 0 for option in row.get('options', []): if 6 + option_idx >= 11: break ws.cell(row=rowNumber, column=6 + option_idx).value = option option_idx = option_idx + 1 ws.cell(row=rowNumber, column=11).value = row.get('answer', '') ws.cell(row=rowNumber, column=12).value = row.get('analysis', '') if kIndex is None: record_id = record_id + 1 def map_type_kv(key): # 1-单选题 2-多选题 6-共享题干题 type = '单选题' if key in ('1', 1): type = '单选题' elif key in ('2', 2): type = '多选题' elif key in ('3', 3): type = '不定项选择题' elif key in ('4', 4): type = '判断题' elif key in ('6', 6): type = '材料题' else: print('不支持的类型:%s' % key) exit() return type def get_test_question(sectionId): params = { 'sectionId': sectionId, 'type': '2', } response = requests.get('https://wx.yiwenjy.cn/yunlian_pc/querySubjectList', params=params, headers=common_headers).json() data = response.get('data') handle_data_2_excel(data) def handle_data_2_excel(data): for i in data: # 此时 i 为对象,取出所有key并遍历 for key in i.keys(): type = map_type_kv(key) # 开始遍历这一题型的所有题目 if type in ['单选题', '多选题', '不定项选择题', '判断题']: for j in i.get(key): row = { 'title': j.get('issue'), 'type': type, 'options': [], 'answer': j.get('answer'), 'analysis': j.get('analysis') } options = j.get('sOption') # 使用|分割选项 options = options.split('|') for k in options: # j为A.选项内容所以取第三个字符开始 row['options'].append(k[2:]) write_row(row) elif type == '材料题': for j in i.get(key): row = { 'title': j.get('stem'), 'type': type } write_row(row) # 开始爬下面的point kIndex = 1 for k in j.get('childre', []): subtype = map_type_kv(k.get('subType')) row = { 'title': k.get('issue'), 'type': subtype, 'options': [], 'answer': k.get('answer'), 'analysis': k.get('analysis') } options = k.get('sOption') # 使用|分割选项 options = options.split('|') for opt in options: # j为A.选项内容所以取第三个字符开始 row['options'].append(opt[2:]) write_row(row, kIndex) kIndex = kIndex + 1 def get_exam_question(paperId): response = requests.get( 'https://wx.yiwenjy.cn/yunlian_pc/queryoPaperSubjectList', params={ 'paperId': paperId, 'mode': '2' }, headers=common_headers).json() data = response.get('data') handle_data_2_excel(data) def get_catalogue(courseName, courseId): response = requests.get('https://wx.yiwenjy.cn/yunlian_pc/querySectionList', params={ 'courseId': courseId }, headers=common_headers).json() data = response.get('data') for i in data: print(f"当前章节ID:{i.get('id')},章节名称:{i.get('sectionName')}") # 创建相关文件夹 if not os.path.exists(courseName + '/' + i.get('sectionName')): os.makedirs(courseName + '/' + i.get('sectionName')) # 这里需要一直向下判断是否有子节点 copy_i = i # dfs算法 access_next_level(courseName + "/", copy_i) def access_next_level(path, item): global wb, ws, rows, record_id # dfs算法开始不断找下级向上返回 if item.get('children') is not None: path = path + item.get('sectionName') + '/' for i in item.get('children'): access_next_level(path, i) else: print(f"当前小节ID:{item.get('id')},小节名称:{item.get('sectionName')}") record_id = 1 wb = load_workbook(filename='sample.xlsx') ws = wb.active rows = ws.rows get_test_question(item.get('id')) # 判断目录是否存在 if not os.path.exists(path): os.makedirs(path) wb.save(f'{path}/{item.get("sectionName")}.xlsx') def get_product_course_info(id): response = requests.get('https://wx.yiwenjy.cn/yunlian_pc/queryProductCourse', params={ 'id': id }, headers=common_headers).json() data = response.get('data') """ 每个ITEM courseName:"中国建设银行VIP" examId:"43d8625d21614cab9f6a2e323e0cd4db" id:"1686999432228376576" """ return data def query_paper_type_list(id): response = requests.get('https://wx.yiwenjy.cn/yunlian_pc/queryPaperTypeList', params={ 'courseId': id }, headers=common_headers).json() """ "id": "1", "paperTypeName": "章节练习", "icon": null, "version": null, "isSection": null, "hasSection": null """ return response.get('data') def get_li_nian_zhen_ti_list(id, paperTypeId): response = requests.get('https://wx.yiwenjy.cn/yunlian_pc/queryPaperList', params={ 'courseId': id, 'paperTypeId': paperTypeId }, headers=common_headers).json() """ 每个ITEM "id": "1703299201187844096", "paperName": "2022年银行招聘笔试《中国建设银行》试题", "onlineTime": "2023-09-17 00:00:00", "referenNumber": 139, "tryBuy": 1, "hasMake": 3, "mode": null """ return response.get('data') course = [ {'id': 'f753f9934c60427fadfba664229a8487', 'name': '2024年军队文职人员招聘《公共科目》题库'} ] for courseItem in course: # 创建科目的文件夹 if not os.path.exists(courseItem.get('name')): os.makedirs(courseItem.get('name')) product_course_info = get_product_course_info(courseItem.get('id')) for product_course in product_course_info: # 查询当前科目下的试卷类型列表 paper_type_list = query_paper_type_list(product_course.get('id')) for paper_type in paper_type_list: print(f"当前科目:{product_course.get('courseName')},当前试卷类型:{paper_type.get('paperTypeName')}") if paper_type.get('paperTypeName') == '章节练习': get_catalogue(courseItem.get("name"), product_course.get('id')) elif paper_type.get('paperTypeName') in ('历年真题', '考前点题', '模拟试卷', '预测试卷', '考前点题'): li_nian_zhen_ti_list = get_li_nian_zhen_ti_list(product_course.get('id'), paper_type.get('id')) for li_nian_zhen_ti in li_nian_zhen_ti_list: record_id = 1 wb = load_workbook(filename='sample.xlsx') ws = wb.active rows = ws.rows print(f"当前试卷ID:{li_nian_zhen_ti.get('id')},试卷名称:{li_nian_zhen_ti.get('paperName')}") get_exam_question(li_nian_zhen_ti.get('id')) wb.save( f'{courseItem.get("name")}/{courseItem.get("name")}-{li_nian_zhen_ti.get("paperName")}.xlsx') else: print("不支持的试卷类型:%s" % paper_type.get('paperTypeName')) exit()
- 2024年04月26日
- 149 阅读
- 0 评论
- 0 点赞