|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | + |
| 4 | +@description: selenium用于爬虫,主要是用来解决javascript渲染的问题 |
| 5 | +""" |
| 6 | + |
| 7 | +import time |
| 8 | + |
| 9 | +from selenium import webdriver |
| 10 | +from selenium.webdriver.common.by import By |
| 11 | +from selenium.webdriver.common.keys import Keys |
| 12 | +from selenium.webdriver.support import expected_conditions |
| 13 | +from selenium.webdriver.support.wait import WebDriverWait |
| 14 | + |
| 15 | + |
| 16 | +def search_baidu_and_fetch(url='https://www.baidu.com', query='姚明老婆是谁?'): |
| 17 | + browser.get(url) |
| 18 | + q = browser.find_element_by_id('kw') |
| 19 | + q.send_keys(query) |
| 20 | + q.send_keys(Keys.ENTER) |
| 21 | + wait = WebDriverWait(browser, 3) |
| 22 | + wait.until(expected_conditions.presence_of_element_located((By.ID, 'content_left'))) |
| 23 | + print('current_url:', browser.current_url) |
| 24 | + print('get_cookies:', browser.get_cookies()) |
| 25 | + print('page_source:', browser.page_source[:100]) |
| 26 | + time.sleep(1) |
| 27 | + |
| 28 | + |
| 29 | +def get_page_source(): |
| 30 | + url = 'https://www.baidu.cn' |
| 31 | + browser.get(url) |
| 32 | + print('url:{}, page_source:{}'.format(url, browser.page_source[:100])) |
| 33 | + |
| 34 | + |
| 35 | +def get_page_element(): |
| 36 | + browser.get('http://www.taobao.com') |
| 37 | + print(browser.page_source) |
| 38 | + lst = browser.find_element_by_css_selector('li') |
| 39 | + lst_c = browser.find_element(By.CSS_SELECTOR, 'li') |
| 40 | + print(lst, lst_c) |
| 41 | + |
| 42 | + |
| 43 | +def get_page_search_element(): |
| 44 | + """对获取到的元素调用交互方法""" |
| 45 | + browser.get('https://www.baidu.com') |
| 46 | + q = browser.find_element_by_id('kw') |
| 47 | + q.send_keys('iphone') |
| 48 | + q.send_keys(Keys.ENTER) |
| 49 | + print(browser.current_url) |
| 50 | + print(len(browser.page_source)) |
| 51 | + time.sleep(5) |
| 52 | + q.clear() |
| 53 | + q.send_keys('ipad') |
| 54 | + q.send_keys(Keys.ENTER) |
| 55 | + # button = browser.find_element_by_class_name('btn-search') |
| 56 | + # button.click() |
| 57 | + print(browser.current_url) |
| 58 | + print(len(browser.page_source)) |
| 59 | + time.sleep(5) |
| 60 | + |
| 61 | + |
| 62 | +def add_action_source(): |
| 63 | + from selenium.webdriver import ActionChains |
| 64 | + |
| 65 | + url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' |
| 66 | + browser.get(url) |
| 67 | + browser.switch_to.frame('iframeResult') |
| 68 | + source = browser.find_element_by_id('draggable') |
| 69 | + target = browser.find_element_by_id('droppable') |
| 70 | + actions = ActionChains(browser) |
| 71 | + actions.drag_and_drop(source, target) |
| 72 | + actions.perform() |
| 73 | + ''' |
| 74 | + 1.先用switch_to_alert()方法切换到alert弹出框上 |
| 75 | + 2.可以用text方法获取弹出的文本 信息 |
| 76 | + 3.accept()点击确认按钮 |
| 77 | + 4.dismiss()相当于点右上角x,取消弹出框 |
| 78 | + ''' |
| 79 | + time.sleep(2) |
| 80 | + print(browser.current_url) |
| 81 | + |
| 82 | + |
| 83 | +def exe_script(): |
| 84 | + browser.get('https://www.zhihu.com/explore') |
| 85 | + browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') |
| 86 | + browser.execute_script('alert("To button")') |
| 87 | + |
| 88 | + |
| 89 | +def get_text(): |
| 90 | + browser.get('https://www.zhihu.com/explore') |
| 91 | + logo = browser.find_element_by_id("Popover1-toggle") |
| 92 | + print(logo) |
| 93 | + print(logo.text) |
| 94 | + print(logo.get_attribute("class")) |
| 95 | + print('logo id, location, tag_name, size:') |
| 96 | + print(logo.id, logo.location, logo.tag_name, logo.size) |
| 97 | + |
| 98 | + |
| 99 | +if __name__ == '__main__': |
| 100 | + # ps:启动环境要求:1.打开safari的偏好设置你的高级-开发菜单;2.开发菜单中打开允许远程自动化。 |
| 101 | + browser = webdriver.Safari() |
| 102 | + |
| 103 | + # search_baidu_and_fetch() |
| 104 | + # get_page_search_element() |
| 105 | + # add_action_source() |
| 106 | + # exe_script() |
| 107 | + get_text() |
| 108 | + browser.close() |
0 commit comments