-
Notifications
You must be signed in to change notification settings - Fork 1
/
model1.Rmd
66 lines (56 loc) · 2.62 KB
/
model1.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
title: "final project"
author: "Yulin Lei"
date: "4/25/2017"
output: html_document
---
```{r}
library(data.table)
library(fields)
library(dplyr)
library(lubridate)
library(raster)
library(stringr)
library(ggplot2)
library(sf)
trip <- fread("201508_trip_data.csv")
station <- fread("201508_station_data.csv")
df = trip %>%
mutate(date = floor_date(mdy_hm(`Start Date`),'day')) %>%
mutate(hour_of_day = hour(mdy_hm(`Start Date`))) %>%
group_by(`Start Terminal`, date, hour_of_day) %>%
summarise(`Number of Trips` = n())
agg = merge(df, station, by.x = 'Start Terminal', by.y = 'station_id') %>% mutate(day_of_week = wday(date))
agg_hour = agg %>% group_by(`Start Terminal`, hour_of_day, lat, long) %>% summarise(avg = mean(`Number of Trips`))
agg_year = agg %>% group_by(`Start Terminal`, lat, long) %>% summarise(avg = sum(`Number of Trips`))
agg_day = agg %>% group_by(`Start Terminal`,date, day_of_week, lat, long) %>% summarise(total = sum(`Number of Trips`)) %>% group_by(`Start Terminal`, day_of_week, lat, long) %>% summarise(avg = mean(total))
counties <- map_data("county")
ca_county <- subset(counties, region == "california")
bay_name <- c("san francisco", "san mateo","santa clara")
bay_county <- subset(ca_county,subregion %in% bay_name)
sf <- subset(ca_county,subregion %in% "san francisco") %>% dplyr::select(long, lat) %>% as.matrix()
sm <- subset(ca_county,subregion %in% "san mateo") %>% dplyr::select(long, lat) %>% as.matrix()
sc <- subset(ca_county,subregion %in% "santa clara") %>% dplyr::select(long, lat) %>% as.matrix()
sf_poly <- st_polygon(list(sf))
sf_sample <- st_sample(sf_poly,100)
sm_poly <- st_polygon(list(sm))
sm_sample <- st_sample(sm_poly,1000)
sc_poly <- st_polygon(list(sc))
sc_sample <- st_sample(sc_poly,3000)
r = raster(nrows=30, ncol=60, xmn = min(bay_county$long), xmx = max(bay_county$long),ymn = min(bay_county$lat), ymx = max(bay_county$lat))
sf_points <- rbind(matrix(unlist(sf_sample),ncol = 2, byrow = TRUE), matrix(unlist(sf_poly),ncol = 2, byrow = TRUE))
sm_points <- rbind(matrix(unlist(sm_sample),ncol = 2, byrow = TRUE), matrix(unlist(sm_poly),ncol = 2, byrow = TRUE))
sc_points <- rbind(matrix(unlist(sc_sample),ncol = 2, byrow = TRUE), matrix(unlist(sc_poly),ncol = 2, byrow = TRUE))
bay_points <- rasterize(rbind(sf_points,sm_points,sc_points),r)
cells = which(!is.na(bay_points[]))
pred_coords = xyFromCell(r, cells)
agg1 = agg_hour %>% filter(hour_of_day==23)
coords = agg1[,c('long','lat')] %>% as.matrix()
tps = Tps(x = coords, Y=agg1$avg)
trip_pred = r
pred <- predict(tps, pred_coords)
pred[pred<0] <- 0
trip_pred[cells] = pred
plot(trip_pred,cex = 0.8)
points(unique(coords), pch=16, cex=0.5)
```