forked from schemes-ohyeah/Hacktech2017
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtester.py
More file actions
101 lines (88 loc) · 3.19 KB
/
tester.py
File metadata and controls
101 lines (88 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import scraper
import json
import config
import requests
import requests.auth
import praw
import weight
from urllib.parse import urlparse
import time
# getTagImage(), getCelebrity(), getOCR()
def print_comments(thread):
COMMENTS_SECTION = 1
print("Thread:", "", thread)
try:
print("Before requests")
req = requests.get(thread)
req.text
data_2 = req.json()
comments = []
print("after requests")
for value in data_2[COMMENTS_SECTION]['data']['children']:
if value['kind'] == "t1":
comments.append(value['data']['body'])
print("Filled array")
for val in comments: # This is where it prints stuff
print(val)
except Exception as e:
print(e)
print("Error in print_comments")
print("json from print_comments")
print(json.dumps(data_2, indent=4))
def get_urls():
LIMIT=20
url_count = 0
subreddits = ["earthporn"]#, "aww", ""]
images = {}
"""
try:
file = open("backup_links.txt", "r+", encoding="utf-8")
except IOError:
print("File not found.")
"""
reddit = praw.Reddit('bot1')
for subreddit in subreddits:
for thread in reddit.subreddit(subreddit).top(limit=LIMIT):
if thread.url[-4:] == ".jpg" or thread.url[-4:] == ".png":
comms = []
submission = reddit.submission(thread.id)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0) # makes sure it has a body (Not the "more" button)
print("Thread ", url_count + 1, " loaded")
# file.write("#\n" + thread.url)
for comment in submission.comments.list():
comms.append(comment.body)
# file.write("$" +
# comment.body)
print("Comments added")
images[thread.url] = comms
print("Image added")
url_count = url_count + 1
elif urlparse(thread.url).netloc == "imgur.com":
url = "{parsed_url.scheme}://i.{parsed_url.netloc}/{parsed_url.path}.png".format(parsed_url=urlparse(thread.url))
comms = []
submission = reddit.submission(thread.id)
submission.comment_sort = 'top'
submission.comments.replace_more(limit=0) # makes sure it has a body (Not the "more" button)
print("Thread ", url_count + 1, " loaded")
# file.write("#\n" + thread.url)
for comment in submission.comments.list():
comms.append(comment.body)
# file.write("$" +
# comment.body)
print("Comments added")
images[url] = comms
print("Image added")
url_count = url_count + 1
"""PRINTS
for key in images:
print(key)
for com in images[key]:
print(com)
"""
print(url_count)
# file.close()
return images
def main():
get_urls()
main()