forked from abhishek-ch/MachineLearning-using-R
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRedditCommentWordcloud.R
More file actions
93 lines (72 loc) · 3.24 KB
/
RedditCommentWordcloud.R
File metadata and controls
93 lines (72 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
require(RSQLite)
require(dplyr)
library(wordcloud)
library(tm)
library(RColorBrewer)
# Set up a repeatbale function for making an unlimited number of subreddit wordclouds
makewordcloud <- function(subreddit) {
sub <- subreddit
max_words <- 75
db <- src_sqlite('../input/database.sqlite', create = F)
db_subset <- db %>%
tbl('May2015') %>%
filter(subreddit == sub)
db_subset <- data.frame(db_subset)
corp <- Corpus(VectorSource(db_subset$body))
corp <- tm_map(corp, tolower)
corp <- tm_map(corp, PlainTextDocument)
corp <- tm_map(corp, removePunctuation)
corp <- tm_map(corp, removeWords, stopwords("english"))
# Removing Common Uninteresting Words Manually
corp <- tm_map(corp, removeWords, c("like", "dont", "people", "deleted", "think", "cant", "back", "one", "will", "pretty", "better", "need", "got", "time", "thats", "always", "even", "youre", "look", "never", "just", "way", "see", "though", "thing", "still", "new", "best", "sure", "ever", "going", "make", "work", "really", "something", "things", "good", "get", "can", "maybe", "great", "different", "actually", "isnt", "doesnt", "use", "lot", "around", "take", "now", "fuck", "real", "two", "theyre", "mean", "someone", "years", "since", "fucking", "say", "made", "know", "find", "little", "point", "yeah", "said", "day", "getting", "looks", "many", "theres", "yes", "long", "old", "right", "shit", "used", "every", "bad", "first", "want", "can", "man", "probably", "everyone", "much", "buy", "hey", "thanks", "means", "open", "important", "top", "ive", "help", "less", "quite", "least", "also", "send", "tried", "bit", "usually", "havent", "decide", "soon", "youll", "usual"))
dtm <- DocumentTermMatrix(corp,
control = list(weighting = function(x) weightTfIdf(x, normalize = FALSE), stopwords = TRUE))
dtm_sparse <- removeSparseTerms(dtm, 0.995)
post_words <- as.data.frame(as.matrix(dtm_sparse))
total_words <- data.frame(words = colnames(post_words),
counts = colSums(post_words))
#Set up output so it is in a named png
png(paste(subreddit,".png", sep=""))
wordcloud(words = total_words$words,
freq=total_words$counts,
max.words = max_words,
color = brewer.pal(8,"Dark2"))
dev.off()
#print(total_words$words)
print(subreddit)
}
# List of Wordclouds to make here
makewordcloud("PlantsVSZombies")
makewordcloud("needforspeed")
makewordcloud("DevilMayCry")
makewordcloud("JC2")
makewordcloud("myst")
makewordcloud("Spore")
makewordcloud("killzone")
makewordcloud("WC3")
makewordcloud("counter_strike")
makewordcloud("Doom")
makewordcloud("Rockband")
makewordcloud("ptcgo")
makewordcloud("AgeofMythology")
makewordcloud("AceAttorney")
makewordcloud("SaintsRow")
makewordcloud("GearsOfWar")
makewordcloud("residentevil")
makewordcloud("kotor")
makewordcloud("silenthill")
makewordcloud("mariokart")
makewordcloud("StreetFighter")
makewordcloud("neopets")
makewordcloud("Madden")
makewordcloud("splatoon")
makewordcloud("TeraOnline")
makewordcloud("Portal")
makewordcloud("dwarffortress")
makewordcloud("Bioshock")
makewordcloud("halo")
makewordcloud("civ")
makewordcloud("zelda")
makewordcloud("starcraft")
makewordcloud("DotA2")
makewordcloud("hearthstone")