
import urllib.request
from bs4 import BeautifulSoup
def index(page):
if page == 1:
url = 'https://pic.netbian.com/index.html'
else:
url = "https://pic.netbian.com/index_{}.html".format(page)
# url = 'https://pic.netbian.com/index.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/93.0.4577.82 Safari/537.36 '
}
w1 = urllib.request.Request(url=url, headers=headers)
w2 = urllib.request.urlopen(w1).read()
data = BeautifulSoup(w2, "html.parser")
ul = data.find("div", class_="slist").find_all("li")
# print(ul)
for i in ul:
n = i.find_all("img")
for j in n:
#图片地址
src = j['src']
#图片标题
title = j["alt"].replace(" ", "")
print(title)
#拼接
src1 = "https://pic.netbian.com/" + src
#print(src1)
#下载图片
urllib.request.urlretrieve(url=src1, filename="./image/"+title+".jpg")
#爬取前20页
for j in range(1, 20):
index(j) 这是爬取的网页链接最新4K壁纸_最新4K桌面壁纸_彼岸图网
图片下载成功