-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathFinanceAnalysis.R
More file actions
104 lines (72 loc) · 3.61 KB
/
FinanceAnalysis.R
File metadata and controls
104 lines (72 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
library(ggplot2)
library(splines)
library(reshape2)
if(Sys.info()["user"] == 'achoudhary'){
data = read.csv("D:/Work/RWorkSpace/icici.csv")
}else{
data = read.csv("/Users/abhishekchoudhary/Google Drive/BIG/FinancialAnalyst/icici.csv")
}
qplot(Deposit,data=data,geom="histogram")
qplot(population,data=data,geom="histogram")
sum(data$Deposit,na.rm = TRUE)
sum(data$Withdrawal,na.rm = TRUE)
#length gives item with PAYMENT in CLeanse column
length(which(data$CLEANSE=="PAYMENT"))
aggregate( cbind( Deposit , Withdrawal ) ~ CLEANSE=="PAYMENT" , data = data , FUN = sum )
aggregate( Deposit ~ (CLEANSE=="PAYMENT"), data = data, sum)
aggregate( Deposit ~ (CLEANSE=="BHOPAL"), data = data, sum)
aggregate( Deposit ~ (CLEANSE=="IRCTC"), data = data, sum)
#1st Column is same as 2nd, so can skip any of that
cdata = update(data,. ~.-Value.Date) #removes Value.Date
cdata <- data[,!(names(data) %in% "Value.Date")]
#So I withdrawn so much money from my account .. Gosh
aggregate( Withdrawal ~ (CLEANSE=="ATM/CASH"), data = cdata, sum)
#Split the data Month wise
datef <- as.Date(cdata$Transaction.Date, format = "%d/%m/%y")
mo <- strftime(datef, "%m")
##month wise deposit and withdrawl to feel embarras
month.dep <- aggregate(Deposit ~ mo , cdata, FUN = sum)
month.with <- aggregate(Withdrawal ~ mo , cdata, FUN = sum)
#adding the column
month.dep["Withdrawl"] <- month.with$Withdrawal
merge(month.dep,month.with,by.x=c("mo","Deposit"),by.y=c("Withdrawal"))
ggplot(data=month.dep,aes(x=mo,y=Deposit,colour=mo))+geom_line()+geom_point()
qplot(mo, Deposit,data=month.dep, geom="bar", stat="identity", fill=as.factor(Deposit))
#WHY DID I use melt
#In general, ggplot2 prefers data in long format. In this case, Deposit and Withdrawal
#are two categories of currency values. So we use melt to put those two categories
#into a single column called variable that ggplot uses for the colour aesthetic,
#while the currency values likewise go into a new value column, each value going with
#its corresponding category in the variable column
month.dep.m = melt(month.dep, id.var="mo")
ggplot(month.dep.m, aes(x=mo, y=value, colour=variable))+ylab("Transactions") +xlab("Month-Wise") +geom_point(aes(size=4.5),shape=21)+
geom_line(aes(group=variable),size=1)
##############Playing With Prediction#################################
#Now I wanted to know when I am going to find withdraw for specific deposit
#Now lets play with the model
View(cdata)
datef <- as.Date(cdata$Transaction.Date, format = "%d/%m/%y")
model1 <- lm(Withdrawal ~ datef
+Deposit +Balance,data=cdata)
step(model1)
#found the required dataset which actually matters
model2 <- update(model1 ,~.-datef )
model2 <- lm(Withdrawal ~ Deposit +Balance,data = cdata)
##This plot says poorest kind of data spread so ideally data is not
#fit for prediction , but even I poked my nose to find something
#Something is better than nothing
qplot(Withdrawal, Deposit +Balance, data = model2, geom = c("point", "smooth"),
method = "lm")
qplot(Withdrawal, Deposit +Balance, data = model2, geom = c("point", "smooth"),
method = "lm",formula = y ~ ns(x, 3))
###Lets do the test actually
noNadata <- cdata[complete.cases(cdata),]
model_smooth <- smooth.spline(noNadata$Withdrawal~ noNadata$Deposit+noNadata$Balance,spar=0.75,nknots=30)
#y.loess <- loess(y ~ x, span=0.75, data.frame(x=noNadata$Withdrawal, y=noNadata$Deposit+noNadata$Balance))
newplotdata <- data.frame(Deposit=1800,Balance=351893)
predict(model_smooth, newplotdata)
predict(model2, newplotdata)
#to find the normal distribution of data
qqPlot(model2,id.n=3)
influenceIndexPlot(model2,id.n=3)
influencePlot(model2,id.n=3)