-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
102 lines (68 loc) · 3.17 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
##This code downloads and creates a tidy data set of the UCI HAR Dataset
#The final output is a summary mean for all the mean variables by activity type and subject
# Download and unzip the dataset
setwd("C:/Users/Chrissy/Documents/R/UCI HAR Dataset")
file <- "project_dataset.zip"
if (!file.exists(file)){
Url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(Url, file, method="libcurl")
}
if (!file.exists("UCI HAR Dataset")) {
unzip(file)
}
#Load dplyr package for merging data
library(dplyr)
# Read in data
features <- read.table("./features.txt")
#This file contains the labesl for activities 1-6 in y file
activity_labels <- read.table("./activity_labels.txt")
#Test - contains 30% of the entries for the dataset (IDs 1-30)
#Subject ID
subject_test <- read.table("./test/subject_test.txt", header=FALSE)
#This file contains the data
x_test <- read.table("./test/x_test.txt", header=FALSE)
#This file contains the type of activity for each row in x_test (1-6)
y_test <- read.table("./test/y_test.txt", header=FALSE)
#Label Activity ID with Activity Type
y_test1<-merge(activity_labels, y_test, by.x="V1", by.y="V1", all=TRUE)
#add labels to dataset
colnames(subject_test)<- c("ID")
colnames(x_test)<- features[,2]
colnames(y_test1)<- c("Activity_ID", "Activity_Type")
#Merge test data set
test_data <- cbind(subject_test, y_test1, x_test)
#Train - contains 80% of the entries for the dataset (IDs 1-30)
subject_train <- read.table("./train/subject_train.txt", header=FALSE)
x_train <- read.table("./train/x_train.txt", header=FALSE)
y_train <- read.table("./train/y_train.txt", header=FALSE)
#Label Activity ID with Activity Type
y_train1<-merge(activity_labels, y_train, by.x="V1", by.y="Activity_ID", all=TRUE)
#Add lables to train dataset
colnames(subject_train)<- c("ID")
colnames(x_train)<- features[,2]
colnames(y_train1)<- c("Activity_ID", "Activity_Type")
#Merge Train dataset
train_data <- cbind(subject_train, y_train1, x_train)
#Merge test and train data
df <- rbind(test_data, train_data)
library(dplyr)
#arrange dataset by ID
df<-arrange(df, ID)
#Eliminate duplicate column names
data <- df[ , !duplicated(colnames(df))]
#Keep only data with mean and standard deviation for each measurement
extract_data <- select(data, ID, Activity_ID, Activity_Type, contains("mean()"), contains("std()"))
#Label Activity IDs with descriptive activity label
names(extract_data)<-gsub("^t", "time", names(extract_data))
names(extract_data)<-gsub("^f", "frequency", names(extract_data))
names(extract_data)<-gsub("Acc", "Accelerometer", names(extract_data))
names(extract_data)<-gsub("Gyro", "Gyroscope", names(extract_data))
names(extract_data)<-gsub("Mag", "Magnitude", names(extract_data))
names(extract_data)<-gsub("BodyBody", "Body", names(extract_data))
#Summarize by Activity_ID and Subject_ID
tidy_data <- select(extract_data, -Activity_Type)
aggdata <-aggregate(tidy_data, by=list(tidy_data$Activity_ID, tidy_data$ID), FUN=mean, na.rm=TRUE)
#Final Summarized Dataset
final_output <- select(aggdata, -Group.1, -Group.2)
#Create table for final data output
write.table(final_output, file="./dataset.txt", row.name=FALSE)