dplyr高级函数
join(数据集之间的链接)
left_join: left_join(x1,x2,by = "name") #保留全部x1表
inner_join: inner_jooin(x1.x2,by = "name") #保留x2,x1的交集
semi_join: semi_join(x,y,by = "name") #保留x1表中全部匹配到的集合
anti_join: anti_join(x1,x2,by = "name") #保留x1中没有匹配到的集合
group by 分组汇总
by_color <- group_by(tbl,color)
summatise(by_color,total = sum(value))
案例:与summatise同用
t <- data.frame(color = c("blue","blue","black","red","blue"),
nu = c(2,5,6,9,1))
by_color <- group_by(t,color)
summarise(by_color,total = sum(nu))
%>% 管道函数
%>%或者 %.%将上一个函数的输出作为下一个函数的输入
order <- read.csv("D:/R/dplyr-data/order.csv")
order <- tbl_df(order)
class(order)
order_new <- mutate(order,
month = substr(orderdate,6,7))
order_month = group_by(order_new,month)
summarise(order_month,count = n())
#
#管道函数需要一次执行
order %.% mutate(month = substr(orderdate,6,7))
%.% group_by(month)
%.% summarise(count = n())
其他函数 do colwise
do 筛选数据记录
do(data,fun(.))
order %>% group_by(year) %>% arrange(desc(totalprice)) %>% do(.,head(,.2))
colwise(自动对没一列调用函数)
library(plyr)
colwise(funtion)(data.frame)
colwise(round)(iris[,1:4]) %>% head
MySQL数据库的链接(dplyr)
src = src_mysql("sqlbook") #连接数据库
orderSQL = tbl(src,from="orders") #连接表
转载请注明原文地址: https://ju.6miu.com/read-1309610.html