-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebdriver_js.py
More file actions
86 lines (76 loc) · 2.79 KB
/
webdriver_js.py
File metadata and controls
86 lines (76 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# coding: utf-8
from selenium import webdriver
import urllib
from bs4 import BeautifulSoup
import sys
import datetime
reload(sys)
sys.setdefaultencoding("utf-8")
driver = webdriver.PhantomJS(executable_path="C:/Python27/phantomjs-2.1.1-windows/bin/phantomjs.exe")
# driver.get("http://www.17.com")
# data = driver.title
# print (data)
# driver.get("http://item.jd.com/2914823.html")
#
# print (driver.page_source)
# fo = open("aaaa1.txt", "wb")
# fo.write(driver.page_source.encode())
# fo.close()
# driver.quit()
#
# from selenium import webdriver
# driver=webdriver.PhantomJS(executable_path="phantomjs.exe")
# driver.get("http://www.csdn.net")
# data = driver.title
# driver.save_screenshot('csdn.png')
# print(data)
# from selenium import webdriver
# driver = webdriver.PhantomJS()
# driver.set_page_load_timeout(5)
# driver.get('http://www.baidu.com')
try:
source = driver.get('http://news.iciba.com/views/dailysentence/daily.html#!/detail/title/2018-05-25')
# driver.find_element_by_id('kw') # 通过ID定位
# driver.find_element_by_class_name('s_ipt') # 通过class属性定位
# driver.find_element_by_name('wd') # 通过标签name属性定位
# driver.find_element_by_tag_name('input') # 通过标签属性定位
# driver.find_element_by_css_selector('#kw') # 通过css方式定位
# driver.find_element_by_xpath("//input[@id='kw']") # 通过xpath方式定位
# driver.find_element_by_link_text("贴吧") # 通过xpath方式定位
# print(driver.find_element_by_id('kw').tag_name) # 获取标签的类型
# print driver.page_source
en = driver.find_elements_by_class_name('sentence-en')[0].text
ch = driver.find_elements_by_class_name('sentence-ch')[0].text
img_src = driver.find_elements_by_xpath('//div[@class="sentence-banner"]/a/img')[0].get_attribute("src")
# photos = driver.find_element_by_xpath('//div[@class="sentence-banner"]//a/img')
# images_all = driver.find_elements_by_xpath('//div[@id="mm-photoimg-area"]/a/img')
# print img_src.get_attribute("src")
urllib.urlretrieve(img_src, 'img-day.jpg')
now = datetime.datetime.now()
# print now
# print now.hour
# print now.year
# print now.month
# print now.day
# print datetime.datetime.today()
# print now.strftime("%Y-%m-%d %H:%M:%S")
f = open("read_day.txt","a+")
f.write(now.strftime("%Y-%m-%d"))
f.write("\n")
f.write(en)
f.write("\n")
f.write(ch)
f.write("\n")
# 解析网页,获取下载图片的网址
# soup = BeautifulSoup(source, 'lxml')
# print soup
# image = soup.find_all('img')
# print image
# url = image.get('src')
# print url
# # 下载图片
# urllib.request.urlretrieve(url, "img-day.jpg")
print("Download picture successfully!")
except Exception as e:
print(e)
driver.quit()