1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
library(data.table)
dt = fread('flights_2014.csv')
nrow(dt) ncol(dt)
names(dt)
head(dt)
dt1 = dt[,origin] dt1 = dt[,.(origin)] dt1 = dt[,c('origin'),with = FALSE]
dt2 = dt[,2,with = FALSE]
dt3 = dt[,.(origin,year,month,hour)] dt4 = dt[,c(2:4),with = FALSE]
dt5 = dt[,!c('origin'),with = FALSE]
dt6 = dt[,!c('origin','year'),with = FALSE]
dt7 = dt[,names(dt) %like% 'dep',with = FALSE]
setnames(dt,c('dest'),c('Destination'))
setnames(dt, c('Destination','origin'),c('dest','origin.of.flighr'))
dt8 = dt[origin.of.flighr == 'JFK'] dt9 = dt[origin.of.flighr %in% c('JFK','LGA')]
dt10 = dt[!origin.of.flighr %in% c('JFK','LGA')]
dt11 = dt[origin.of.flighr == 'JFK' & carrier == 'AA']
dt12 = setorder(dt,origin.of.flighr) dt13 = setorder(dt,-origin.of.flighr) dt14 = setorder(dt,origin.of.flighr,-carrier)
dt[,dep_sch:=dep_time-dep_delay]
dt[,c('dep_sch','arr_sch'):=list(dep_time-dep_delay,arr_time - arr_delay)]
dt[,flag:=ifelse(min<50,1,0)]
dt[,dep_sch:=dep_time-dep_delay][,.(dep_time,dep_delay,dep_sch)]
dt[,.(mean = mean(dep_delay,na.rm = TRUE), median = median(arr_delay, na.rm = TRUE), min = min(arr_delay, na.rm = TRUE), max = max(arr_delay, na.rm = TRUE))]
dt[,.(mean(arr_delay), mean(dep_delay))]
|