不知道我理解的对不对,我是这样的,等待搜索框DOM加载>输入关键字并回车>等待frame且switch进去(不然没法点单曲)>等待选项卡渲染进DOM树>点击单曲选项卡>等待下一页DOM渲染>滚屏>如果你在这里不滚屏不在可视范围好像点击下一页是不生效的,好了这时候点下一页,等待搜索结果,下一页,等待结果如此循环
很多地方不严谨如有错,纯属测试不严谨,请不要在意OOP部分,你可关注main部分,Wait很重要!
- import time
- import threading
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.action_chains import ActionChains
- class Singleton(object):
- instance = None
- def __new__(cls, base_url, browser='chrome'):
- if cls.instance is None:
- i = object.__new__(cls)
- cls.instance = i
- cls.base_url = base_url
- cls.browser = browser
- if browser == "firefox":
- # 创建一个单列的 Firefox driver
- cls.driver = webdriver.Firefox()
- elif browser == "chrome":
- # 创建一个单列的 Chrome driver
- chrome_options = Options()
- # chrome_options.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
- # chrome_options.add_argument('--disable-software-rasterizer')
- chrome_options.add_argument('blink-settings=imagesEnabled=false') # 不加载图片, 提升速度
- cls.driver = webdriver.Chrome(chrome_options=chrome_options)
- else:
- # 不在支持范围内.
- print("只支持 Firefox 或 Chrome!")
- else:
- i = cls.instance
- return i
- class InstanceWebDriver(Singleton):
- def __init__(self, base_url):
- self.base_url = base_url
- def webdriver_wait_until(self, timeout, poll_frequency, method, message='WebDriverWaitUntil超时!'):
- value = None
- try:
- value = WebDriverWait(self.driver, timeout, poll_frequency).until(method, message)
- except Exception as e:
- print(e)
- self.close()
- finally:
- return value
- def webdriver_wait_until_not(self, timeout, poll_frequency, method, message='WebDriverWaitUntilNot超时!'):
- value = None
- try:
- value = WebDriverWait(self.driver, timeout, poll_frequency).until_not(method, message)
- except Exception as e:
- print(e)
- self.close()
- finally:
- return value
- def implicitly_wait(self, seconds=60):
- self.driver.implicitly_wait(seconds)
- def get(self):
- self.driver.get(self.base_url)
- def find_element(self, locator, strategies):
- return self.driver.find_element(locator, strategies)
- def find_element_by_xpath(self, strategies):
- return self.driver.find_element_by_xpath(strategies)
- def find_element_by_css_selector(self, strategies):
- return self.driver.find_element_by_css_selector(strategies)
- def find_elements_by_class_name(self, strategies):
- return self.driver.find_elements_by_class_name(strategies)
- def find_element_by_id(self, id):
- return self.driver.find_element_by_id(id)
- def find_element_by_link_text(self, link_text):
- return self.driver.find_element_by_link_text(link_text)
- def actionchains_by_sendkeys(self, keys_to_send):
- ActionChains(self.driver).send_keys(keys_to_send).perform()
- def close(self):
- if not self.driver is None:
- self.driver.close()
- self.driver.quit()
- def main():
- instancewebdriver = InstanceWebDriver("https://music.163.com/#/search/m/?s=凉凉&type=100")
- instancewebdriver.get()
- instancewebdriver.webdriver_wait_until(60, 0.5, EC.presence_of_element_located((By.ID, 'srch')), "等待'搜索框'超时!")
- srch = instancewebdriver.find_element_by_id('srch')
- srch.clear()
- srch.send_keys('神话')
- srch.send_keys(Keys.ENTER)
- instancewebdriver.webdriver_wait_until(60, 0.5, EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, '#g_iframe')),
- "等待'搜索结果IFRAME'超时!")
- instancewebdriver.webdriver_wait_until(60, 0.5, EC.presence_of_element_located((By.CLASS_NAME, 'fst')), "等待'选项卡单曲'超时!")
- tab_type1 = instancewebdriver.find_element_by_css_selector("li.fst>a")
- tab_type1.click()
- instancewebdriver.webdriver_wait_until(60, 0.5, EC.presence_of_element_located((By.LINK_TEXT, '下一页')), "等待'下一页'DOM渲染超时!")
- instancewebdriver.actionchains_by_sendkeys(Keys.END)
- is_displayed = instancewebdriver.webdriver_wait_until(60, 0.5, EC.visibility_of_element_located((By.LINK_TEXT, '下一页')),
- "等待'下一页'可见状态超时!")
- if is_displayed:
- for i in range(17):
- # 当前页数
- number_page_btn = instancewebdriver.find_element_by_xpath("//a[contains(@class, 'js-selected')]")
- print(number_page_btn.text)
- nextpage_btn = instancewebdriver.find_element_by_link_text('下一页')
- nextpage_btn.click()
- print(nextpage_btn.text)
- instancewebdriver.webdriver_wait_until(60, 2, EC.presence_of_element_located((By.CLASS_NAME, "sn")),
- "等待'搜索结果列表'超时!")
- if __name__ == '__main__':
- main()
复制代码
|