迷失的小屋
首页
注册

[selenium]2025/5/12 第五人格图片下载

迷失的蒙娜丽莎
迷失的蒙娜丽莎
2025-05-12 11:33:15
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
import sys
from bs4 import BeautifulSoup
import requests
import random

# chrome_driver_path = r'E:\webdriver\chromedriver.exe'
# # 创建 Service 对象
# service = Service(chrome_driver_path)
# # 启动 WebDriver
# driver = webdriver.Chrome(service=service)
driver = webdriver.Chrome()
driver.get('https://id5.163.com/nrzx/picture.html')
driver.maximize_window()
bs4_item = BeautifulSoup(driver.page_source, 'html.parser')
text_item = bs4_item.find('div', class_='text')
iteritor_number = int(text_item.text[-2:-1])
# 一共有九页,所以需要循环九次
for index in range(iteritor_number):
    bs4_item = BeautifulSoup(driver.page_source, 'html.parser')
    cont_div = bs4_item.find('div', class_='cont')
    div_list = cont_div.findAll('div', class_='item hb')
    # print(div_list)
    for item_hb in div_list:
        img_data_original = item_hb.find('img', class_='lazy').get('data-original')
        img_name = item_hb.find('span').text
        with open('dwrg_img/' + img_name + '.png', 'wb') as pngWriter:
            pngWriter.write(requests.get(img_data_original).content)
            print(img_name+' 下载成功')
            time.sleep(0.5 * random.randint(1,5))
    element = driver.find_element(By.CLASS_NAME,'next-page')
    element.click()
    print('现在是第'+str(index+1)+'页')
    time.sleep(0.5 * random.randint(1,5))