| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 
 | # 载入相应的模块import time
 import requests
 import openpyxl
 
 time1 = time.time()
 
 lists = []
 lists.append(['answer_kname','headline','voteup_count','content'])
 
 ##################
 for i in range(0,1,1):
 url = 'https://www.zhihu.com/api/v4/questions/64270965/answers'
 headers = {
 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
 }
 params = {
 'include': 'data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics',
 'offset': str(i),
 'limit': '3',
 'sort_by': 'default',
 'platform': 'desktop'
 }
 res = requests.get(url, headers=headers, params=params)
 res_json = res.json()
 items = res_json['data']
 for item in items:
 answer_kname = item['author']['name']
 headline = item['author']['headline']
 content = item['content']
 voteup_count = item['voteup_count']
 
 lists.append([answer_kname,headline,voteup_count,content])
 
 ##################
 for i in range(3,3,1):
 url = 'https://www.zhihu.com/api/v4/questions/64270965/answers'
 headers = {
 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
 }
 params = {
 'include': 'data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics',
 'offset': str(i),
 'limit': '5',
 'sort_by': 'default',
 'platform': 'desktop'
 }
 res = requests.get(url, headers=headers, params=params)
 res_json = res.json()
 items = res_json['data']
 for item in items:
 answer_kname = item['author']['name']
 headline = item['author']['headline']
 content = item['content']
 voteup_count = item['voteup_count']
 
 lists.append([answer_kname,headline,voteup_count,content])
 
 ##################
 for i in range(8,278,5):
 url = 'https://www.zhihu.com/api/v4/questions/64270965/answers'
 headers = {
 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
 }
 params = {
 'include': 'data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics',
 'offset': str(i),
 'limit': '5',
 'sort_by': 'default',
 'platform': 'desktop'
 }
 res = requests.get(url, headers=headers, params=params)
 res_json = res.json()
 items = res_json['data']
 for item in items:
 answer_kname = item['author']['name']
 headline = item['author']['headline']
 content = item['content']
 voteup_count = item['voteup_count']
 
 lists.append([answer_kname,headline,voteup_count,content])
 
 
 ##################
 file = openpyxl.Workbook()
 sheet = file.active
 sheet.title = 'answers'
 for i in lists:
 sheet.append(i)
 
 file.save('即将步入研究生,有什么忠告.xlsx')
 
 ##################
 file_html = open('知乎:即将步入研究生,有什么忠告.html','w',encoding= 'utf-8')
 
 for i in lists:
 file_html.write(i[3])
 file_html.close()
 
 ##################
 time2 = time.time()
 print('爬虫耗时:%.3f'%(float(time2-time1)),'秒')
 
 
 |