I would like to get the pop-up data from a website.
As shown in the first figure, I need to click a link.
After that, a pop-up, as shown in the second figure will appear.
The content of this pop-up is what I want.
I tried to follow the example using pyqyt5
to get the data.
However, the program continue to run permanently.
How to solve this problem?
Thank you very much.
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.first_pass = True
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.load(QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if self.first_pass:
self._first_finished()
self.first_pass = False
else:
self._second_finished()
def _first_finished(self):
self.page().runJavaScript("document.getElementById('auto-header-citypop-citylist');")
def _second_finished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = r'https://www.autohome.com.cn'
web = Render(url)
with open('data2.html', 'w', encoding='utf-8-sig') as f:
f.write(web.html)
There was this JavaScript on the page:
if (rf === "" || rf.toLocaleLowerCase().indexOf(".autohome.com.cn") === -1) {
if (screen == undefined || screen.width < 810) {
if (browser.versions.mobile == true || browser.versions.iPhone == true || browser.versions.ucweb == true || browser.versions.android == true || browser.versions.Symbian == true) {
window.location.href = "//m.autohome.com.cn/?from=pc";
return
}
}
}
Which redirected you to https://m.autohome.com.cn/?from=pc as you could see by printing self.url(). To get around this I set the Referer header like this:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, QByteArray
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWebEngineCore import QWebEngineHttpRequest
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.request = QWebEngineHttpRequest(QUrl(url))
self.request.setHeader(QByteArray().append('Referer'), QByteArray().append('https://www.autohome.com.cn/beijing/'))
self.load(self.request)
self.app.exec_()
def _load_finished(self, result):
self.page().runJavaScript("document.getElementById('auto-header-switcharea').click();")
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = 'https://www.autohome.com.cn/beijing/'
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
for city in soup.find_all('a', {'name':'auto-header-citypop-city'}):
print(city)
Outputs:
<a data-info="[110100, 646, '北京', 'beijing']" data-key="110100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">北京</a>
<a data-info="[440100, 62, '广州', 'guangzhou']" data-key="440100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">广州</a>
<a data-info="[440300, 670, '深圳', 'shenzhen']" data-key="440300" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">深圳</a>
<a data-info="[320100, 335, '南京', 'nanjing']" data-key="320100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">南京</a>
<a data-info="[310100, 649, '上海', 'shanghai']" data-key="310100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">上海</a>
....
There is no page load after the click event so no need for two _load_finished methods.