-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
137 lines (101 loc) · 5.67 KB
/
run_analysis.R
File metadata and controls
137 lines (101 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#####################################
#Getting and Cleaning Data Course Project
#Date Created: 9-17-2014
#Created by: Samia Askari
# Project Objective:
## 1. Merge the training and the test sets to create one data set.
## 2. Extract only the measurements on the mean and standard deviation for each measurement.
## 3. Use descriptive activity names to name the activities in the data set
## 4. Appropriately label the data set with descriptive activity names.
## 5. Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
#run_analysis.R: This script performs steps to achieve objectives above
# Prerequisites to run the script:
## 1. Download data from site: https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip
## 2. Downloaded file has folder "UCI HAR Dataset"
## 3. Find the path to folder "UCI HAR Dataset" on your computer
## 4. Run function "run_analysis.R" by passing this path as input parameter to function
#####################################
run_analysis <- function(workingDirectory){
## Set working directory
setwd(workingDirectory)
## Pre-work to create one data set
# Clean up space
rm(list=ls())
# Reading train datasets
x_train <- read.table('./train/X_train.txt',header=FALSE) #read x_train.txt
y_train <- read.table('./train/y_train.txt',header=FALSE) #read y_train.txt
subject_train <- read.table('./train/subject_train.txt',header=FALSE) #read subject_train.txt
features <- read.table('./features.txt',header=FALSE) #read features.txt
activityType <- read.table('./activity_labels.txt',header=FALSE) #read activity_labels.txt
# Assigining column names to train data imported above
colnames(x_train) <- features[,2]
colnames(y_train) <- "activityId"
colnames(activityType) <- c('activityId','activityType')
colnames(subject_train) <- "subjectId"
# Merging x_train, y_train and subject_train to create final Train_Data
Train_Data <- cbind(y_train,subject_train,x_train)
# Reading test datasets
subject_test <- read.table('./test/subject_test.txt',header=FALSE) #read subject_test.txt
x_test <- read.table('./test/X_test.txt',header=FALSE) #read X_test.txt
y_test <- read.table('./test/y_test.txt',header=FALSE) #read y_test.txt
# Assign column names to the test data imported above
colnames(subject_test) <- "subjectId"
colnames(x_test) <- features[,2]
colnames(y_test) <- "activityId"
# Merging x_test, y_test and subject_test to create final Test_Data
Test_Data <- cbind(y_test,subject_test,x_test)
#############
## 1: Merges the training and the test sets to create one data set.
############
# Merging Train_Data and Test_Data
Final_Data <- rbind(Train_Data, Test_Data)
#############
## 2: Extract only the measurements on the mean and standard deviation for each measurement.
############
# Creating a vector for the column names from the Final_Data to compute mean() & stddev()
colNames <- colnames(Final_Data)
# Create a vector that contains TRUE values for the ID, mean() & stddev() columns
# and FALSE for the rest
meanANDstd <- (grepl("activity..",colNames) | grepl("subject..",colNames) | grepl("-mean..",colNames) & !grepl("-meanFreq..",colNames) & !grepl("mean..-",colNames) | grepl("-std..",colNames) & !grepl("-std()..-",colNames))
# Subset Final_Data table based on the meanANDstd to keep only desired columns
Final_Data <- Final_Data[meanANDstd==TRUE]
#############
## 3: Use descriptive activity names to name the activities in the data set
############
# Merge Final_Data table with acitivity_labels table to get descriptive activity names
Final_Data <- merge(Final_Data, activityType, by='activityId', all.x=TRUE)
# Updating the colNames vector to include the new column names after merge
colNames <- colnames(Final_Data)
#############
## 4: Appropriately label the data set with descriptive activity names.
#############
# Renaming variables
for (i in 1:length(colNames))
{
colNames[i] <- gsub("\\()","",colNames[i])
colNames[i] <- gsub("-std$","StdDev",colNames[i])
colNames[i] <- gsub("-mean","Mean",colNames[i])
colNames[i] <- gsub("^(t)","time",colNames[i])
colNames[i] <- gsub("^(f)","freq",colNames[i])
colNames[i] <- gsub("([Gg]ravity)","Gravity",colNames[i])
colNames[i] <- gsub("([Bb]ody[Bb]ody|[Bb]ody)","Body",colNames[i])
colNames[i] <- gsub("[Gg]yro","Gyro",colNames[i])
colNames[i] <- gsub("AccMag","AccMagnitude",colNames[i])
colNames[i] <- gsub("([Bb]odyaccjerkmag)","BodyAccJerkMagnitude",colNames[i])
colNames[i] <- gsub("JerkMag","JerkMagnitude",colNames[i])
colNames[i] <- gsub("GyroMag","GyroMagnitude",colNames[i])
};
# Assigning new descriptive column names to the Final_Data table
colnames(Final_Data) <- colNames
#############
## 5: Create a second, independent tidy data set with the average of each variable for each activity and each subject.
#############
# Create a new table without the activityType column
newData <- Final_Data[ ,names(Final_Data) != 'activityType']
# Summarizing newData table to include only mean of each variable for each activity and each subject
tidyData <- aggregate(newData[ ,names(newData) != c('activityId','subjectId')],by=list(activityId=newData$activityId,subjectId = newData$subjectId),mean)
# Merging tidyData table with activityType table to include descriptive acitvity names
tidyData <- merge(tidyData, activityType, by='activityId', all.x=TRUE)
# Export the tidyData set
write.table(tidyData, './TidyData.txt',row.names=FALSE,sep='\t')
} #function end