安装 Selenium 库
// Python 2.x
pip install Selenium
// Python 3.x
pip3 install Selenium
安装 PhantomJS
从 PhantomJS下载地址 中选择相应版本下载并解压
Python 代码
from selenium import webdriver
from bs4 import BeautifulSoup
import time
driver = webdriver.PhantomJS(executable_path='/path/to/download/phantomjs-2.1.1-macosx/bin/phantomjs')
driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
time.sleep(3)
# print(driver.findelementby_id('content').text)
pageSource = driver.page_source
bsObj = BeautifulSoup(pageSource)
print(bsObj.find(id="content").get_text())
driver.close()
根据控件检查页面是否已经完全加载
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.PhantomJS(executable_path='/path/to/download/phantomjs-2.1.1-macosx/bin/phantomjs')
driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
try:
element = WebDriverWait(driver, 10).until(EC.presence_ofelementlocated((By.ID, "loadedButton")))
finally:
print(driver.findelementby_id("content").text)
driver.close()
参考
《Python网络数据采集》
driver.execute_script
没有注释估摸着是driver.page_source 这句执行了代码是么