-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetPagesource_with request.py
More file actions
62 lines (50 loc) · 1.91 KB
/
getPagesource_with request.py
File metadata and controls
62 lines (50 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import sys
import re
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
# -*- coding: utf-8 -*-
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
url = QUrl.fromEncoded(url)
self.useragent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"
####### The code below set a cookie jar for thie webpage
cookiebase = QNetworkCookie("","")
self.cookies = cookiebase.parseCookies("")
f = open("cookies_sina.txt")
lines = f.readlines()
tmphash = {}
for line in lines:
tmpl = line.split(":")
if re.search("name",tmpl[0]):
name = tmpl[1].rstrip("\n")
name = name.rstrip(",")
name = re.sub('"(.*?)"', r'\1', name)
elif re.search("value",tmpl[0]):
value = tmpl[1].rstrip("\n")
value = re.sub('"(.*?)"', r'\1', value)
tmphash[name] = value
name = ""
value = ""
for each in tmphash.keys():
cookietmp = QNetworkCookie(QByteArray(each),QByteArray(tmphash[each]))
self.cookies.append(cookietmp)
####### The code below is to set the cookiejar to the networkmanager
self.cookiejar = QNetworkCookieJar()
self.cookiejar.setCookiesFromUrl(self.cookies, url)
self.network_manager = QNetworkAccessManager()
self.network_manager.setCookieJar(self.cookiejar)
self.setNetworkAccessManager(self.network_manager)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(url)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = 'http://s.weibo.com/weibo/%25E6%2595%25B0%25E5%25AD%25A6%25E6%25BB%259A%25E5%2587%25BA%25E9%25AB%2598%25E8%2580%2583&page=2'
r = Render(url)
html = r.frame.toHtml()
print html.toUtf8()