配置环境 ,准备2个东西,一个是谷歌浏览器和一个浏览器驱动。
<!-- selenium --> <dependency> <groupId>org.seleniumhq.selenium</groupId> <artifactId>selenium-java</artifactId> <version>3.141.0</version> </dependency> <!-- selenium -->
public class Demo { public static void main(String[] args) { System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.get("http://www.baidu.com"); String title = driver.getTitle(); System.out.print(title); } }
package com.java456.selenium; import java.util.HashMap; import java.util.Map; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.support.ui.ExpectedCondition; import org.openqa.selenium.support.ui.WebDriverWait; public class Demo { public static void main(String[] args) { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe"); // 初始化 不加载图片 Map<String, Object> preferences = new HashMap<String, Object>(); ChromeOptions options = new ChromeOptions(); preferences.put("profile.managed_default_content_settings.images", 2); options.setExperimentalOption("prefs", preferences); WebDriver driver = new ChromeDriver(options); driver.get("https://pan.baidu.com/s/1qYmsGSs"); // 初始化 不加载图片 // 设置超时5秒 默认也有的可能长一点。 WebDriverWait wait = new WebDriverWait(driver, 5); wait.until(new ExpectedCondition<Boolean>() { public Boolean apply(WebDriver d) { boolean loadcomplete = d.findElement(By.tagName("body")).isDisplayed(); return loadcomplete; } }); // 设置超时5秒 默认也有的可能长一点。 String title = driver.getTitle(); System.out.print(title); driver.close(); driver.quit(); } }
public static void main(String[] args) { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator.USER-20190917GB\\AppData\\Local\\Programs\\Python\\Python36\\chromedriver.exe"); // 初始化 不加载图片 Map<String, Object> preferences = new HashMap<String, Object>(); ChromeOptions options = new ChromeOptions(); preferences.put("profile.managed_default_content_settings.images", 2); options.setExperimentalOption("prefs", preferences); WebDriver driver = new ChromeDriver(options); driver.get("https://pan.baidu.com/s/1wFE8gYizFaebwJLf93lAxQ "); // 初始化 不加载图片 // 设置超时5秒 默认也有的可能长一点。 WebDriverWait wait = new WebDriverWait(driver, 5); wait.until(new ExpectedCondition<Boolean>() { public Boolean apply(WebDriver d) { boolean loadcomplete = d.findElement(By.tagName("body")).isDisplayed(); return loadcomplete; } }); // 设置超时5秒 默认也有的可能长一点。 String title = driver.getTitle(); WebElement input = driver.findElement(By.cssSelector(".QKKaIE.LxgeIt")); WebElement btn = driver.findElement(By.cssSelector(".g-button-right")); input.sendKeys("6nlq"); btn.click(); System.out.print(title); //System.out.print(driver.getPageSource()); //driver.close(); //driver.quit(); }
WebElement input2 = driver.findElement(By.cssSelector("#input_search")); ((JavascriptExecutor)driver).executeScript("document.getElementById(\"input_search\").value=34", input2); 中文要使用下面的这段代码 try { ((JavascriptExecutor)driver).executeScript("document.getElementById(\"input_search\").value='在在'" , input2); }catch (Exception e){ System.out.println(e); } 执行js代码要try一下。不然报错。
String title = driver.getTitle(); WebElement input = driver.findElement(By.cssSelector(".QKKaIE.LxgeIt")); WebElement btn = driver.findElement(By.cssSelector(".g-button-right")); input.sendKeys("6nlq"); btn.click();
driver.findElement(By.linkText("链接的全部文字内容")); |
driver.findElement(By.partialLinkText("链接的部分文字内容")); |
driver.findElement(By.partialLinkText(">")).click(); |
def start(): driver = webdriver.Chrome() driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url input = driver.find_element_by_css_selector('input.rekeyword') input.send_keys(word) btn = driver.find_element_by_css_selector('input#btnSearch') btn.click() time.sleep(4) iframe = driver.find_element_by_id('iframeResult') driver.switch_to.frame(iframe) # 切换到内容iframe go(driver)
//这个需要切回到原来的页面 切回list列表页面 driver.switchTo().window(listWindow); String titleChild = driver.getTitle(); System.out.println("当前url:"+driver.getCurrentUrl()); System.out.println("titleChild:"+titleChild); //这个需要切回到原来的页面 切回list列表页面
// //这个节点很多。他总是点第1个。不对。 可以使用list 分别点击不同的。 List<WebElement> btnList = driver.findElements(By.cssSelector("div.zwss-item-job-title")); WebElement btn = btnList.get(index); btn.click();//点击 这个点击是新打开的窗口 index++; String url = driver.getCurrentUrl(); Set<String> childids = driver.getWindowHandles();//因为有多个窗口,所以在遍历 一睛。 String listWindow = ""; for(String s:childids){ System.out.println(s); if(!driver.switchTo().window(s).getCurrentUrl().contains("https://www.szhr.com.cn/zwss")){ //如果不是我列表的url 那么就说明这个页面是 新打开的。获取新页面的url和标题。 driver.switchTo().window(s); String titleChild = driver.getTitle(); System.out.println("当前url:"+driver.getCurrentUrl()); System.out.println("titleChild:"+titleChild); driver.close(); break; }else{ //这个窗口是我的列表窗口记录一下 listWindow = s; } } //这个需要切回到原来的页面 切回list列表页面 driver.switchTo().window(listWindow); String titleChild = driver.getTitle(); System.out.println("当前url:"+driver.getCurrentUrl()); System.out.println("titleChild:"+titleChild); //这个需要切回到原来的页面 切回list列表页面
WebElement btn = driver.findElement(By.cssSelector(".btn-next")); if(btn.isEnabled()){ btn.click(); }else{ System.out.println("下一页按钮不可以点击!!!!!!"); }
driver.navigate().back();
from selenium import webdriver import time from bs4 import BeautifulSoup import ZhiWang_DB word = "中医药" def zhiwang2(): driver = webdriver.Chrome() driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url input = driver.find_element_by_css_selector('input.rekeyword') input.send_keys('新冠肺炎') btn = driver.find_element_by_css_selector('input#btnSearch') btn.click() time.sleep(4) #使用BeautifulSoup解析html里面的内容 def jie_xi_html(driver): #print(driver.page_source) mysoup = BeautifulSoup(driver.page_source) table = mysoup.find('table', attrs={'class': 'GridTableContent'}) print('table_len:%d'%len(table)) tbody = table.find('tbody') print('tbody_len:%d'%len(tbody)) trList = tbody.find_all('tr') print('trList_len:%d' %len(trList)) #跳过第一个。 for tr in trList[1:]: #print(tr) tdList = tr.find_all('td') #print('tdList:%d' % len(tdList)) #1是 题目 td1 = tdList[1] print('题目%s'%td1.getText().strip()) print('题目链接%s'%td1.find('a').get("href")) title = td1.getText().strip() title = title.replace('\'',' ') print(title) #2 是作者 td2 = tdList[2] print('作者%s'%td2.getText().strip()) # 3 是来源 td3 = tdList[3] print('来源%s'%td3.getText().strip()) # 4 是时间 td4 = tdList[4] print('时间%s'%td4.getText().strip()) # 5 是数据库 td5 = tdList[5] print('数据库%s'%td5.getText().strip()) ZhiWang_DB.add(title, td2.getText().strip(), td4.getText().strip(), td5.getText().strip(), td3.getText().strip(), word) print('================================') #tdList = tr.find('td') #print('tdList:%d'%len(tdList)) #找到iframe 输出 解析html 跳转下一页 继续执行自身 def go(driver): #找不到下一页 报错 #print(next_page.text) #解析内容 输出 jie_xi_html(driver) # 滚动到下面 点击 下一页 next_page = driver.find_element_by_partial_link_text('下一页') next_page.click() time.sleep(6) go(driver) def start(): driver = webdriver.Chrome() driver.get('https://kns.cnki.net/kns/brief/default_result.aspx') # 打开url input = driver.find_element_by_css_selector('input.rekeyword') input.send_keys(word) btn = driver.find_element_by_css_selector('input#btnSearch') btn.click() time.sleep(4) iframe = driver.find_element_by_id('iframeResult') driver.switch_to.frame(iframe) # 切换到内容iframe go(driver) def test_zhiwang(): driver = webdriver.Chrome() driver.get('https://kns.cnki.net/kns/brief/result.aspx?dbprefix=scdb') # 打开url input = driver.find_element_by_css_selector('#txt_1_value1') input.send_keys('中医') time.sleep(2) btn = driver.find_element_by_css_selector('#btnSearch') btn.click()#点击事件 报错。 time.sleep(14) start()
import java.util.Set; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.phantomjs.PhantomJSDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.Wait; import org.openqa.selenium.support.ui.WebDriverWait; public class YsfTest_20180725{ public static void main(String[] args) throws InterruptedException{ WebElement search = null; System.setProperty("webdriver.chrome.driver","C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.get("http://www.w3school.com.cn/html/html_forms.asp"); //获得父级窗口的id String parentId = driver.getWindowHandle(); //获取父级窗口的标题名 String parentTitle = driver.getTitle(); //找到“亲自试一试”按钮 search = driver.findElement(By.xpath("/html/body/div/div[4]/div[6]/p[2]/a")); //点击“亲自试一试”按钮 search.click(); //获取父级窗口id和子级窗口id,放在chirldids数组里 Set<String> childids = driver.getWindowHandles(); // System.out.println("list:"+chirldids); for(String s:childids){ if(driver.switchTo().window(s).getTitle().contains("W3School在线测试工具 V2")){ driver.switchTo().window(s); String titleChild = driver.getTitle(); System.out.println("name:"+titleChild); Thread.sleep(5000); driver.close(); break; } } driver.switchTo().window(parentId); String titleParent = driver.getTitle(); System.out.println("name:"+titleParent); driver.close(); } }
站长微信:xiaomao0055
站长QQ:14496453