Skip to content

Commit 4aeaef6

Browse files
Documentation added and changes in getInput
1 parent c6cb6e4 commit 4aeaef6

File tree

2 files changed

+31
-29
lines changed

2 files changed

+31
-29
lines changed

PyQtDesigner/WebScrapper/WebScapper.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import sys
88

9+
#Init PythonUI
910
class Web(QtWidgets.QMainWindow):
1011
def __init__(self):
1112
super(Web,self).__init__()
@@ -16,36 +17,48 @@ def __init__(self):
1617
self.buttonGetInput.clicked.connect(self.getInput)
1718
self.buttonSaveLogs.clicked.connect(self.saveLogs)
1819
self.buttonClearLogs.clicked.connect(self.clearLogs)
20+
# self.buttonClear.clicked.connect()
21+
# self.buttonStop.clicked.connect()
22+
1923
self.oldUrl = "_"
20-
24+
25+
#Acttivate UI fucntions
2126
QtCore.pyqtSlot()
27+
28+
# Returns Time in e.g 1 oct 2018 formart
2229
def getTimeStamp(self):
2330
return datetime.now().strftime("%A, %d. %B %Y %I:%M:%S %p")
2431

32+
# Validate input url and throw Expection for invalid
2533
def validateUrl(self,url):
2634
try:
27-
# if new url found
35+
# Create new session only if new url
2836
if self.oldUrl != url:
2937
self.oldUrl = url
3038
session = HTMLSession()
3139
self.r = session.get(url)
3240
return True
41+
# else True because if only xpathSrc changed and continue scrapping from given url
3342
else :
34-
return False
43+
return True
44+
# Throw expection and close the fucntion
3545
except requests.exceptions.RequestException as e:
3646
self.listWidgetLogs.addItem(str(e))
3747
return False
3848

39-
40-
def getSource(self):
49+
# This function will scrap it is also recurive if required
50+
def getSource(self):
51+
# returns list for single Xpath item
4152
src = self.r.html.xpath(self.xpathSrc)
42-
53+
# proceed further if list is not empty
4354
if src :
4455
timeStamp = self.getTimeStamp()
4556
for s in src:
4657
print(s)
4758
s = str(s)
59+
# List widget only supports string
4860
self.listWidgetLogs.addItem(s)
61+
4962
if self.r.html._next():
5063
print(self.r.html._next())
5164
url = self.validateUrl(self.r.html._next())
@@ -54,22 +67,23 @@ def getSource(self):
5467
self.getSource()
5568

5669

57-
70+
# Get button function connection
5871
def getInput(self):
5972
url = self.textEditUrl.toPlainText()
6073
self.xpathSrc = self.textEditSource.toPlainText()
61-
if url is None or len(url) < 5:
74+
75+
if url and self.xpathSrc is not None:
76+
source = self.validateUrl(url)
77+
if source:
78+
self.getSource()
79+
elif url is None or len(url) < 5:
6280
timestampDay = self.getTimeStamp()
6381
self.listWidgetLogs.addItem(timestampDay)
6482
self.listWidgetLogs.addItem('Url cannot be empty ¯\_(ツ)_/¯ \n')
6583
elif self.xpathSrc is None:
6684
self.listWidgetLogs.addItem('xpath input \_(ʘ_ʘ)_/ ? ')
67-
else :
68-
source = self.validateUrl(url)
69-
if source:
70-
self.getSource()
71-
72-
85+
86+
# Saves logs from Preserved logs
7387
def saveLogs(self):
7488
timestampDay = datetime.now().strftime("%A, %d. %B %Y %I:%M:%S %p")
7589
file = open(self.scriptDir + os.path.sep +'WebScrappingLogs_'+timestampDay+'.txt','a+')
@@ -80,7 +94,7 @@ def saveLogs(self):
8094
file.close()
8195
self.listWidgetLogs.addItem('Logs saved Sucessfully')
8296

83-
97+
# clear preservedLogs
8498
def clearLogs(self):
8599
self.listWidgetLogs.clear()
86100

@@ -92,15 +106,3 @@ def clearLogs(self):
92106

93107
# https://www.reddit.com/r/ProgrammerHumor/
94108
# r = r.html.xpath('//*[@class="y8HYJ-y_lTUHkQIc1mdCq"]//h2//text()')
95-
96-
97-
# if self.r.html._next():
98-
# url = validateUrl(self.r.html._next())
99-
# if url:
100-
# if self.r.status_code == 200:
101-
# self.getSource()
102-
# else:
103-
# self.listWidgetLogs.addItem('Sorry we are unable to get next page')
104-
105-
# else:
106-
# self.listWidgetLogs.addItem('Could not find given xpath, (•◡•) please try again diffrent')

PyQtDesigner/WebScrapper/WebScrapper.ui

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ QWidget#centralwidget
156156
<string>Save Logs</string>
157157
</property>
158158
</widget>
159-
<widget class="QRadioButton" name="radioButton">
159+
<widget class="QRadioButton" name="radioButtonFirst">
160160
<property name="geometry">
161161
<rect>
162162
<x>120</x>
@@ -192,7 +192,7 @@ QWidget#centralwidget
192192
<string>Stop</string>
193193
</property>
194194
</widget>
195-
<widget class="QRadioButton" name="radioButton_2">
195+
<widget class="QRadioButton" name="radioButtonAll">
196196
<property name="geometry">
197197
<rect>
198198
<x>230</x>

0 commit comments

Comments
 (0)