Issue
I'm trying to play with a chess online playing site crawler available in this github link:
https://github.com/Rseiji/ChessCommentaryGeneration (a fork I created from the original repo)
It uses Python2 and PyQt4, whose module QtWebKit4 is no longer available.
So, I found this link:
Python 2.7.11 - ImportError: cannot import name QtWebKit - Kali Linux / Debian 8
I didn't understand it well (what is sparta?), but I understood there is a library called PySide which has a module QtWebKit which could be used.
So I tried to modify the crawler's code, simply changing the import
lines
import sys
from PyQt4.QtGui import *
from PyQt4.QtCore import *
#from PyQt4.QtWebKit import *
from PySide.QtWebKit import *
from lxml import html
import pickle
import time
from PyQt4 import QtGui, QtCore
import functools
import sys
import argparse
def parseArguments():
parser = argparse.ArgumentParser()
#parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
#typ = params.typ
#Take this class for granted.Just use result of rendering.
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QUrl(url)
func = functools.partial(self.mainFrame().load, qurl )
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load( open("./saved_files/saved_links.p", "r") )
#extra_links = pickle.load( open("extra_pages.p", "r") )
print "len(all_links) = ",len(all_links)
num = sys.argv[1]
i = params.i
print "i = ",type(i)
num = params.num
url = all_links[i]
if num!=0:
url+="&pg="+str(num)
print "i, url = ",i,url
#This step is important.Converting QString to Ascii for lxml to process
#archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num==0:
fw = open("./saved_files/saved"+str(i)+".html", "w")
else:
fw = open("./saved_files/saved"+str(i)+"_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print "---- SLEEPING ---- "
time.sleep(10)
except:
print "ERROR!!"
error_count+=1
print "error_count = ",error_count
##if i>4:
## break
if __name__=="__main__":
save_all()
Before, when executing the code with python run_all.py 0 11577 1
, the error was that the QtWebKit
module, but now, I get:
TypeError: 'PySide.QtWebKit.QWebFrame.load' called with wrong argument types:
PySide.QtWebKit.QWebFrame.load(QUrl)
Supported signatures:
PySide.QtWebKit.QWebFrame.load(PySide.QtNetwork.QNetworkRequest, PySide.QtNetwork.QNetworkAccessManager.Operation = QNetworkAccessManager.GetOperation, PySide.QtCore.QByteArray = QByteArray())
PySide.QtWebKit.QWebFrame.load(PySide.QtCore.QUrl)
It doesn't indicate any code line, and repeats this message continuously.
What can I do?
Thank you!
Solution
Although PyQt4 and PySide are Qt4 wrappers, they are not compatible with each other, and that is the reason for the error. The solution is to use PyQt4 or use PySide, not both. In this case the code for PySide is:
import argparse
import functools
from lxml import html
import pickle
import sys
import time
# from PyQt4 import QtCore, QtGui, QtWebKit
from PySide import QtCore, QtGui, QtWebKit
def parseArguments():
parser = argparse.ArgumentParser()
# parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
parser.add_argument("-i", type=int, dest="i", help="i")
parser.add_argument("-num", type=int, dest="num", help="num")
args = parser.parse_args()
return args
params = parseArguments()
# typ = params.typ
# Take this class for granted.Just use result of rendering.
class Render(QtWebKit.QWebPage):
def __init__(self, url):
self.app = QtGui.QApplication(sys.argv)
QtWebKit.QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
qurl = QtCore.QUrl(url)
func = functools.partial(self.mainFrame().load, qurl)
timer = QtCore.QTimer()
timer.timeout.connect(func)
timer.start(10000)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def save_all():
global cur_url
global html_doc
all_links = pickle.load(open("./saved_files/saved_links.p", "r"))
# extra_links = pickle.load( open("extra_pages.p", "r") )
print("len(all_links) = ", len(all_links))
num = sys.argv[1]
i = params.i
print("i = ", type(i))
num = params.num
url = all_links[i]
if num != 0:
url += "&pg=" + str(num)
print("i, url = ", i, url)
# This step is important.Converting QString to Ascii for lxml to process
# archive_links = html.fromstring(str(result.toAscii()))
cur_url = url
error_count = 0
try:
r = Render(cur_url)
result = r.frame.toHtml()
html_doc = result.toAscii()
if num == 0:
fw = open("./saved_files/saved" + str(i) + ".html", "w")
else:
fw = open("./saved_files/saved" + str(i) + "_" + str(num) + ".html", "w")
fw.write(html_doc)
fw.close()
print("---- SLEEPING ---- ")
time.sleep(10)
except:
print("ERROR!!")
error_count += 1
print("error_count = ", error_count)
##if i>4:
## break
if __name__ == "__main__":
save_all()
Answered By - eyllanesc
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.