好久沒給大家跟新爬蟲文章了,抱歉抱歉,這期給大家帶來一篇小爬蟲,希望大家喜歡,喜歡的來個關注,支援一下博主,謝謝各位了。
寫程式碼了各位寶寶們
這裡要注意一點就是要記得提前登入,cookie要用登入後的
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'87.0.4280.88 Safari/537.36',
'Referer': 請用自己的,
'Cookie': 請用自己的,
'csrf': 請用自己的,
'Host': 'www.kuwo.cn'
}
在翻頁的時候可以利用for迴圈進行遍歷
for i in range(page):
start_url = f'http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={singer_name}&pn=' \
f'{i + 1}&rn=30&httpsStatus=1&reqId=d301af60-6e1e-11ec-840f-dfca3a2ceb68'
解析資料
response = requests.get(start_url, headers=headers).json()
# 解析得到song_names, song_rids
song_names = jsonpath.jsonpath(response, '$..name')
song_rids = jsonpath.jsonpath(response, '$..rid')
遍歷得到song_name, song_rid 構造song_info_url
# 遍歷得到song_name, song_rid 構造song_info_url
for song_name, song_rid in zip(song_names, song_rids):
song_info_url = 'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={}' \
'&type=music&httpsStatus=1&reqId=c0ac92a0-6e35-11ec-b428-05be0a87bc11'.format(song_rid)
原始碼展示:
# !/usr/bin/nev python
# -*-coding:utf8-*-
from threading import Thread
import tkinter as tk
import os, requests, jsonpath
from requests_html import HTMLSession
session = HTMLSession()
class kuwoSpider(object):
def __init__(self):
"""定義視覺化視窗,並設定視窗和主題大小布局"""
self.window = tk.Tk()
self.window.title('音樂下載器')
self.window.geometry('800x600')
"""建立label_user按鈕,與說明書"""
self.label_user = tk.Label(self.window, text='請輸入要下載的歌手名字:',
font=('Arial', 12), width=130, height=2)
self.label_user.pack()
"""建立label_user關聯輸入"""
self.entry_user = tk.Entry(self.window, show=None, font=('Arial', 14))
self.entry_user.pack(after=self.label_user)
"""建立label_passwd按鈕,與說明書"""
self.label_passwd = tk.Label(self.window, text="請輸入爬取頁數: ", font=('Arial', 12), width=30, height=2)
self.label_passwd.pack()
"""建立label_passwd關聯輸入"""
self.entry_passwd = tk.Entry(self.window, show=None, font=('Arial', 14))
self.entry_passwd.pack(after=self.label_passwd)
"""建立Text富文字方塊,用於按鈕操作結果的展示"""
self.text1 = tk.Text(self.window, font=('Arial', 12), width=85, height=22)
self.text1.pack()
"""定義按鈕1,繫結觸發事件方法"""
self.button_1 = tk.Button(self.window, text='爬取', font=('Arial', 12), width=10, height=1,
command=self.run)
self.button_1.pack(before=self.text1)
"""定義按鈕2,繫結觸發事件方法"""
self.button_2 = tk.Button(self.window, text='清除', font=('Arial', 12), width=10, height=1,
command=self.parse_hit_click_2)
self.button_2.pack(anchor="e")
def run(self):
Thread(target=self.parse_hit_click_1).start()
def parse_hit_click_1(self):
"""定義觸發事件1,呼叫main函數"""
singer_name = self.entry_user.get()
page = int(self.entry_passwd.get())
self.main(singer_name, page)
def main(self, singer_name, page):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'87.0.4280.88 Safari/537.36',
'Referer': 請用自己的,
'Cookie': 請用自己的,
'csrf': 請用自己的,
'Host': 'www.kuwo.cn'
}
for i in range(page):
start_url = f'http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={singer_name}&pn=' \
f'{i + 1}&rn=30&httpsStatus=1&reqId=d301af60-6e1e-11ec-840f-dfca3a2ceb68'
response = requests.get(start_url, headers=headers).json()
# 解析得到song_names, song_rids
song_names = jsonpath.jsonpath(response, '$..name')
song_rids = jsonpath.jsonpath(response, '$..rid')
# 遍歷得到song_name, song_rid 構造song_info_url
for song_name, song_rid in zip(song_names, song_rids):
song_info_url = 'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={}' \
'&type=music&httpsStatus=1&reqId=c0ac92a0-6e35-11ec-b428-05be0a87bc11'.format(song_rid)
# 請求得到 song_info
try:
song_info = requests.get(song_info_url, headers=headers).json()
# 解析得到song_url
song_url = jsonpath.jsonpath(song_info, '$..url')[0]
# 請求得到song_content
try:
song_content = requests.get(song_url).content
except Exception as e:
continue
# 建立資料夾
if not os.path.exists('./{}'.format(singer_name)):
os.mkdir('./{}'.format(singer_name))
# 儲存資料
try:
with open('./{}/{}.mp3'.format(singer_name, song_name), 'wb')as f:
f.write(song_content)
self.text1.insert("insert", '****{}****下載成功'.format(song_name))
self.text1.insert("insert", '\n ')
self.text1.insert("insert", '\n ')
except Exception as e:
continue
except Exception as e:
continue
def parse_hit_click_2(self):
"""定義觸發事件2,刪除文字方塊中內容"""
self.entry_user.delete(0, "end")
self.entry_passwd.delete(0, "end")
self.text1.delete("1.0", "end")
def center(self):
"""建立視窗居中函數方法"""
ws = self.window.winfo_screenwidth()
hs = self.window.winfo_screenheight()
x = int((ws / 2) - (800 / 2))
y = int((hs / 2) - (600 / 2))
self.window.geometry('{}x{}+{}+{}'.format(800, 600, x, y))
def run_loop(self):
"""禁止修改表單大小規格"""
self.window.resizable(False, False)
"""視窗居中"""
self.center()
"""視窗維持--持久化"""
self.window.mainloop()
if __name__ == '__main__':
b = kuwoSpider()
b.run_loop()