LOL爬蟲小工具是一款利用python指定爬取LOL高清壁紙、皮膚圖片、頭像圖片、圖標(biāo)等下載工具,根據(jù)網(wǎng)上的爬蟲教程結(jié)合愛好寫了個(gè)爬蟲,能爬取LOL國(guó)服官網(wǎng)資料庫(kù)里的圖片。研究了一下python,寫出了這么個(gè)小東西。
軟件說明
爬取英雄聯(lián)盟高清圖片
運(yùn)行結(jié)果
工具源碼
# coding=utf-8
import requests
import re
import json
import os
import urllib
import time
import threading
from queue import Queue
def getHttpStatusCode(url):
try:
request = requests.get(url)
httpStatusCode = request.status_code
return httpStatusCode
except requests.exceptions.HTTPError as e:
return e
def Get_Hero_Jsurl(queue):
url = 'https://lol.qq.com/biz/hero/champion.js'
response = requests.get(url).content.decode('gb2312')
regular = r'data":(.*?),"version":"'
dict_js = json.loads(re.findall(regular, response)[0])
for _item in dict_js:
EN = dict_js[_item]['id']
queue.put(EN.format(id=_item))
def Stitching_JS_links(name_en):
link = 'https://lol.qq.com/biz/hero/{}.js'.format(name_en)
return link
def Parsing_hero_JS(url):
response = requests.get(url).content.decode('gb2312')
regular = r'data":(.*?),"version":"'
js_data = json.loads(re.findall(regular, response)[0])
result = {
'name_CN': js_data['name'] + ' ' + js_data['title'],
'name_EN': js_data['id'],
'skins': js_data['skins'],
'skill': js_data['spells'],
'passive': js_data['passive'],
}
return result
def Download_hero_resources(detail_url_list: Queue, id):
while len(detail_url_list.queue):
hero_name = detail_url_list.get()
Metadata = Parsing_hero_JS(Stitching_JS_links(hero_name))
path = os.getcwd() + '\\英雄相關(guān)\\' + Metadata['name_CN']
if not os.path.exists(path + '\\頭像'):
os.makedirs(path + '\\頭像')
if not os.path.exists(path + '\\皮膚'):
os.makedirs(path + '\\皮膚')
if not os.path.exists(path + '\\技能'):
os.makedirs(path + '\\技能')
# 下載皮膚和頭像圖 [img]https://game.gtimg.cn/images/lol/act/img/skin/big1000.jpg[/img]
for item_skin in Metadata['skins']:
url_0 = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(
item_skin['id'])
url_1 = 'https://game.gtimg.cn/images/lol/act/img/skin/small{}.jpg'.format(
item_skin['id'])
urllib.request.urlretrieve(
url_0,
path + '\\皮膚\\' + item_skin['name'].replace('/', '') + '.jpg')
urllib.request.urlretrieve(
url_1,
path + '\\頭像\\' + item_skin['name'].replace('/', '') + '.jpg')
pass
# 下載技能圖 [img]https://game.gtimg.cn/images/lol/act/img/spell/AnnieR.png[/img]
for item_skill in Metadata['skill']:
url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}.png'.format(
item_skill['id'])
save_path = path + '\\技能\\' + item_skill[
'name'] + '_' + item_skill['id'].replace(hero_name,
'') + '.png'
if (getHttpStatusCode(url_3) != 404):
urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))
url_3 = 'https://game.gtimg.cn/images/lol/act/img/passive/{}'.format(
Metadata['passive']['image']['full'])
if (getHttpStatusCode(url_3) == 404):
url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}'.format(
Metadata['passive']['image']['full'])
save_path = path + '\\技能\\' + Metadata['passive'][
'name'] + '_P' + '.png'
# test = getHttpStatusCode(url_3)
urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))
def Features_1():
detail_url_queue = Queue(maxsize=1000)
thread = threading.Thread(target=Get_Hero_Jsurl, args=(detail_url_queue, ))
html_thread = []
thread.start()
for i in range(20):
thread2 = threading.Thread(target=Download_hero_resources,
args=(detail_url_queue, i))
html_thread.append(thread2)
start_time = time.time()
print('將在2秒后啟動(dòng)多線程下載')
time.sleep(2)
for i in range(20):
html_thread[i].start()
sum_num = len(detail_url_queue.queue)
thread.join()
for i in range(20):
html_thread[i].join()
time_ = time.time() - start_time
print('共下載了{(lán)}位英雄資源'.format(sum_num))
print("用時(shí): {}分{}秒".format(int(time_ // 60), int(time_ % 60) + 1))
def Features_2():
hero_js = 'https://lol.qq.com/biz/hero/item.js'
response = requests.get(hero_js).content.decode('gb2312')
正則 = r'data":(.*?),"tree'
list_js = re.findall(正則, response)
dict_js = json.loads(list_js[0])
Item_name = {}
Item_url = {}
if not os.path.exists(os.getcwd() + '\\裝備'):
os.makedirs(os.getcwd() + '\\裝備')
for i in dict_js:
Item_name[i] = dict_js[i]['name']
url0 = 'https://game.gtimg.cn/images/lol/act/img/item/'
url1 = '.png'
Item_url[i] = url0 + i + url1
p = 0
for i in Item_url:
urllib.request.urlretrieve(
Item_url[i],
os.getcwd() + '\\裝備\\' + Item_name[i] + '.png')
p += 1
_JD = (p / len(Item_url)) * 100
print('已完成 ', end='')
print('%.2lf' % _JD, end=' %\n')
def main():
print('LOL爬蟲小工具 作者:艾斯托維亞')
print('———————————————————————————————————————')
print(' 1 | 從官網(wǎng)下載最新英雄頭像、皮膚原畫、技能圖標(biāo)')
print(' 2 | 從官網(wǎng)下載最新最新裝備圖 ')
print('———————————————————————————————————————')
n = input('請(qǐng)輸入數(shù)字以選擇功能\n')
n = int(n)
if n > 0 and n < 3:
swicth = {
1: Features_1,
2: Features_2,
}
swicth[n]()
input('數(shù)據(jù)保存在軟件同目錄\n按任意鍵退出\n')
else:
print('錯(cuò)誤輸入\n程序即將退出')
time.sleep(2)
if __name__ == "__main__":
main()