Skip to content

Commit b0bd7be

Browse files
Final version added
1 parent f534a7a commit b0bd7be

File tree

1 file changed

+308
-0
lines changed

1 file changed

+308
-0
lines changed
Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Form implementation generated from reading ui file 'H:\Github\PythonScripts\PyQtDesigner\WebScrapper\WebScrapper.ui'
4+
#
5+
# Created by: PyQt5 UI code generator 5.10.1
6+
#
7+
# WARNING! All changes made in this file will be lost!
8+
9+
from PyQt5 import QtCore, QtGui, QtWidgets
10+
from PyQt5.QtWidgets import QSystemTrayIcon, QAction
11+
from PyQt5.QtGui import QIcon
12+
from datetime import datetime
13+
from requests_html import HTMLSession
14+
import requests.exceptions
15+
import os
16+
import sys
17+
import multiprocessing
18+
19+
class Ui_MainWindow(object):
20+
def setupUi(self, MainWindow):
21+
MainWindow.setObjectName("MainWindow")
22+
MainWindow.resize(1366, 768)
23+
MainWindow.setWindowTitle('WebScrapper by [@Nishant Ghanate]')
24+
self.scriptDir = os.path.dirname(os.path.realpath(__file__))
25+
MainWindow.setWindowIcon(QIcon(self.scriptDir + os.path.sep + 'icon.png'))
26+
27+
# Gui.QSystemTrayIcon
28+
MainWindow.setAutoFillBackground(False)
29+
# MainWindow.setMaximumWidth(1366)
30+
# MainWindow.setMaximumHeight(768)
31+
MainWindow.setStyleSheet("QMainWindow#MainWindow\n"
32+
"{\n"
33+
" /*background-color:rgb(195,223,255);*/\n"
34+
" background-image: url(\'H:/Github/PythonScripts/PyQtDesigner/WebScrapper/bg.png\');\n"
35+
" background-position: center; /* Center the image */\n"
36+
" background-repeat: no-repeat;\n"
37+
" "
38+
" \n"
39+
" \n"
40+
"}\n"
41+
"\n"
42+
"\n"
43+
"QWidget#centralwidget\n"
44+
"{\n"
45+
"\n"
46+
"}\n"
47+
"\n"
48+
"QLabel\n"
49+
"{ \n"
50+
" color:rgb(255,255,255);\n"
51+
"}\n"
52+
"\n"
53+
"QRadioButton\n"
54+
"{\n"
55+
" color:rgb(255,255,255);\n"
56+
"}\n"
57+
"\n"
58+
"\n"
59+
"")
60+
# self.setWindowIcon(QIcon(self.scriptDir + os.path.sep + 'icon.png'))
61+
self.centralwidget = QtWidgets.QWidget(MainWindow)
62+
self.centralwidget.setStyleSheet("")
63+
self.centralwidget.setObjectName("centralwidget")
64+
self.listWidgetMain = QtWidgets.QListWidget(self.centralwidget)
65+
self.listWidgetMain.setGeometry(QtCore.QRect(30, 60, 481, 400))
66+
font = QtGui.QFont()
67+
font.setPointSize(10)
68+
self.listWidgetMain.setFont(font)
69+
self.listWidgetMain.setObjectName("listWidgetMain")
70+
self.listWidgetLogs = QtWidgets.QListWidget(self.centralwidget)
71+
self.listWidgetLogs.setGeometry(QtCore.QRect(860, 60, 461, 541))
72+
font = QtGui.QFont()
73+
font.setPointSize(10)
74+
self.listWidgetLogs.setFont(font)
75+
self.listWidgetLogs.setObjectName("listWidgetLogs")
76+
self.buttonGetInput = QtWidgets.QPushButton(self.centralwidget)
77+
self.buttonGetInput.setGeometry(QtCore.QRect(40, 630, 80, 30))
78+
font = QtGui.QFont()
79+
font.setPointSize(10)
80+
self.buttonGetInput.setFont(font)
81+
self.buttonGetInput.setObjectName("buttonGetInput")
82+
self.textEditUrl = QtWidgets.QTextEdit(self.centralwidget)
83+
self.textEditUrl.setGeometry(QtCore.QRect(30, 490, 481, 31))
84+
font = QtGui.QFont()
85+
font.setPointSize(12)
86+
self.textEditUrl.setFont(font)
87+
self.textEditUrl.setObjectName("textEditUrl")
88+
self.textEditSource = QtWidgets.QTextEdit(self.centralwidget)
89+
self.textEditSource.setGeometry(QtCore.QRect(30, 570, 481, 31))
90+
font = QtGui.QFont()
91+
font.setPointSize(12)
92+
self.textEditSource.setFont(font)
93+
self.textEditSource.setObjectName("textEditSource")
94+
self.buttonClearLogs = QtWidgets.QPushButton(self.centralwidget)
95+
self.buttonClearLogs.setGeometry(QtCore.QRect(980, 630, 90, 30))
96+
font = QtGui.QFont()
97+
font.setPointSize(10)
98+
self.buttonClearLogs.setFont(font)
99+
self.buttonClearLogs.setObjectName("buttonClearLogs")
100+
self.buttonSaveLogs = QtWidgets.QPushButton(self.centralwidget)
101+
self.buttonSaveLogs.setGeometry(QtCore.QRect(860, 630, 90, 30))
102+
font = QtGui.QFont()
103+
font.setPointSize(10)
104+
self.buttonSaveLogs.setFont(font)
105+
self.buttonSaveLogs.setObjectName("buttonSaveLogs")
106+
self.radioButtonFirst = QtWidgets.QRadioButton(self.centralwidget)
107+
self.radioButtonFirst.setGeometry(QtCore.QRect(120, 540, 82, 17))
108+
font = QtGui.QFont()
109+
font.setPointSize(10)
110+
self.radioButtonFirst.setFont(font)
111+
self.radioButtonFirst.setObjectName("radioButtonFirst")
112+
self.buttonStop = QtWidgets.QPushButton(self.centralwidget)
113+
self.buttonStop.setGeometry(QtCore.QRect(150, 630, 80, 30))
114+
font = QtGui.QFont()
115+
font.setPointSize(10)
116+
self.buttonStop.setFont(font)
117+
self.buttonStop.setObjectName("buttonStop")
118+
self.radioButtonAll = QtWidgets.QRadioButton(self.centralwidget)
119+
self.radioButtonAll.setGeometry(QtCore.QRect(230, 540, 71, 20))
120+
font = QtGui.QFont()
121+
font.setPointSize(10)
122+
self.radioButtonAll.setFont(font)
123+
self.radioButtonAll.setObjectName("radioButtonAll")
124+
self.labelMain = QtWidgets.QLabel(self.centralwidget)
125+
self.labelMain.setGeometry(QtCore.QRect(170, 20, 200, 31))
126+
font = QtGui.QFont()
127+
font.setPointSize(12)
128+
self.labelMain.setFont(font)
129+
self.labelMain.setAutoFillBackground(False)
130+
self.labelMain.setFrameShape(QtWidgets.QFrame.StyledPanel)
131+
self.labelMain.setFrameShadow(QtWidgets.QFrame.Plain)
132+
self.labelMain.setAlignment(QtCore.Qt.AlignCenter)
133+
self.labelMain.setObjectName("labelMain")
134+
self.labelLog = QtWidgets.QLabel(self.centralwidget)
135+
self.labelLog.setGeometry(QtCore.QRect(1000, 20, 200, 31))
136+
font = QtGui.QFont()
137+
font.setPointSize(12)
138+
self.labelLog.setFont(font)
139+
self.labelLog.setFrameShape(QtWidgets.QFrame.StyledPanel)
140+
self.labelLog.setFrameShadow(QtWidgets.QFrame.Sunken)
141+
self.labelLog.setAlignment(QtCore.Qt.AlignCenter)
142+
self.labelLog.setObjectName("labelLog")
143+
self.label = QtWidgets.QLabel(self.centralwidget)
144+
self.label.setGeometry(QtCore.QRect(30, 470, 91, 21))
145+
font = QtGui.QFont()
146+
font.setPointSize(10)
147+
self.label.setFont(font)
148+
self.label.setObjectName("label")
149+
self.label_2 = QtWidgets.QLabel(self.centralwidget)
150+
self.label_2.setGeometry(QtCore.QRect(30, 540, 71, 31))
151+
font = QtGui.QFont()
152+
font.setPointSize(10)
153+
self.label_2.setFont(font)
154+
self.label_2.setObjectName("label_2")
155+
self.buttonClear = QtWidgets.QPushButton(self.centralwidget)
156+
self.buttonClear.setGeometry(QtCore.QRect(260, 630, 80, 30))
157+
font = QtGui.QFont()
158+
font.setPointSize(10)
159+
self.buttonClear.setFont(font)
160+
self.buttonClear.setObjectName("buttonClear")
161+
MainWindow.setCentralWidget(self.centralwidget)
162+
163+
# self.setFixedSize(1366, 768)
164+
self.buttonGetInput.clicked.connect(self.getInput)
165+
self.buttonSaveLogs.clicked.connect(self.saveLogs)
166+
self.buttonClearLogs.clicked.connect(self.clearLogs)
167+
self.buttonClear.clicked.connect(self.clear)
168+
self.buttonStop.clicked.connect(self.stop)
169+
self.buttonStop.setEnabled(False)
170+
self.oldUrl = "_"
171+
172+
173+
self.retranslateUi(MainWindow)
174+
QtCore.QMetaObject.connectSlotsByName(MainWindow)
175+
176+
def retranslateUi(self, MainWindow):
177+
_translate = QtCore.QCoreApplication.translate
178+
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
179+
self.buttonGetInput.setText(_translate("MainWindow", "Get "))
180+
self.buttonClearLogs.setText(_translate("MainWindow", "Clear logs"))
181+
self.buttonSaveLogs.setText(_translate("MainWindow", "Save Logs"))
182+
self.radioButtonFirst.setText(_translate("MainWindow", "Firs page"))
183+
self.buttonStop.setText(_translate("MainWindow", "Stop"))
184+
self.radioButtonAll.setText(_translate("MainWindow", "All pages"))
185+
self.labelMain.setText(_translate("MainWindow", "Command Logs"))
186+
self.labelLog.setText(_translate("MainWindow", "Source logs"))
187+
self.label.setText(_translate("MainWindow", "Website url :"))
188+
self.label_2.setText(_translate("MainWindow", "Xpath :"))
189+
self.buttonClear.setText(_translate("MainWindow", "Clear"))
190+
self.Stop = True
191+
192+
#Acttivate UI fucntions
193+
# QtCore.pyqtSlot()
194+
195+
# Returns Time in e.g 1 oct 2018 formart
196+
def getTimeStamp(self):
197+
return datetime.now().strftime("%A %d. %B %Y %I:%M:%S %p")
198+
199+
def stop(self):
200+
self.buttonGetInput.setEnabled(True)
201+
self.Stop = False
202+
203+
# Validate input url and throw Expection for invalid
204+
def validateUrl(self,url):
205+
try:
206+
# Create new session only if new url
207+
if self.oldUrl != url:
208+
self.oldUrl = url
209+
session = HTMLSession()
210+
self.r = session.get(url)
211+
return True
212+
# else True because if only xpathSrc changed and continue scrapping from given url
213+
else :
214+
return True
215+
# Throw expection and close the fucntion
216+
except requests.exceptions.RequestException as e:
217+
self.listWidgetLogs.addItem(str(e))
218+
return False
219+
220+
# This function will scrap it is also recurive if required
221+
def getSource(self):
222+
try:
223+
# returns list for single Xpath item
224+
src = self.r.html.xpath(self.xpathSrc)
225+
# proceed further if list is not empty
226+
if src and self.Stop :
227+
timeStamp = self.getTimeStamp()
228+
for s in src:
229+
# print(s)
230+
s = str(s)
231+
# List widget only supports string
232+
self.listWidgetLogs.addItem(s)
233+
# if all pages is checked
234+
if self.radioButtonAll.isChecked():
235+
if self.r.html._next():
236+
print(self.r.html._next())
237+
url = self.validateUrl(self.r.html._next())
238+
if url and self.r.status_code == 200:
239+
# self.listWidgetLogs.addItem('\n'+self.r.html._next())
240+
self.getSource()
241+
except :
242+
self.listWidgetLogs.addItem('invalid xpath format')
243+
244+
245+
# Get button function connection
246+
def getInput(self):
247+
url = self.textEditUrl.toPlainText()
248+
self.xpathSrc = self.textEditSource.toPlainText()
249+
print(self.xpathSrc)
250+
if url and self.xpathSrc.strip() :
251+
source = self.validateUrl(url)
252+
if source:
253+
timestampDay = self.getTimeStamp()
254+
self.listWidgetLogs.addItem(timestampDay)
255+
self.listWidgetMain.addItem(self.xpathSrc)
256+
self.getSource()
257+
self.buttonStop.setEnabled(True)
258+
259+
260+
elif url is None or len(url) < 5:
261+
timestampDay = self.getTimeStamp()
262+
self.listWidgetLogs.addItem(timestampDay)
263+
self.listWidgetLogs.addItem('Url cannot be empty ')
264+
else :
265+
timestampDay = self.getTimeStamp()
266+
self.listWidgetLogs.addItem(timestampDay)
267+
self.listWidgetLogs.addItem('xpath input where is it ?')
268+
269+
# Saves logs from Preserved logs
270+
def saveLogs(self):
271+
timestampDay = datetime.now().strftime("%A %d %B %Y %I %M %S%p")
272+
fileName = self.scriptDir + os.path.sep +'WebScrappingLogs '+ timestampDay +'.txt'
273+
file = open(fileName,'a+' , encoding='utf-8')
274+
275+
file.writelines('Command Logs ' + timestampDay + '\n')
276+
for i in range(self.listWidgetMain.count()):
277+
file.writelines(self.listWidgetMain.item(i).text() + '\n')
278+
279+
file.writelines('\nSource Logs')
280+
for i in range(self.listWidgetLogs.count()):
281+
file.writelines(self.listWidgetLogs.item(i).text() + '\n')
282+
# file.writelines(itemsTextList)
283+
file.close()
284+
self.listWidgetLogs.addItem('Logs saved Sucessfully')
285+
286+
# clear preservedLogs
287+
def clearLogs(self):
288+
self.listWidgetLogs.clear()
289+
290+
def clear(self):
291+
self.listWidgetMain.clear()
292+
293+
294+
295+
296+
if __name__ == "__main__":
297+
298+
app = QtWidgets.QApplication(sys.argv)
299+
300+
301+
MainWindow = QtWidgets.QMainWindow()
302+
ui = Ui_MainWindow()
303+
ui.setupUi(MainWindow)
304+
MainWindow.setWindowTitle('WebScrapper by [@Nishant Ghanate]')
305+
MainWindow.show()
306+
307+
sys.exit(app.exec_())
308+

0 commit comments

Comments
 (0)