Язык R Отчет (Скворцов) / Анализ данных на языке R
.pdfd <- as.data.frame(matrix(rnorm(30), nrow = 5))
my_fun <- function(x) x * 2
d[1, 1] <- NA
my_list <- list()
for (i in seq_along(d)){ temp_col <- d[, i]
neg_numbers <- temp_col[temp_col < 0] my_list[[i]] <- neg_numbers
}
names(my_list) <- colnames(d)
my_list <- apply(d, 2, function(x) x[x < 0])
find_negative <- function(x){ x[x < 0]
}
find_positive <- function(x){ x[x > 0]
}
apply(d, 2, find_positive)
# step 4
apply(array, margin, ...) head(iris)
aov_result <- apply(iris[, 1:4], 2, function(x) aov(x ~ iris$Species))
norm_test <- apply(iris[, 1:4], 2,
function(x) shapiro.test(x))
norm_test_p <- apply(iris[, 1:4], 2, function(x)
shapiro.test(x)$p.value)
str(aov_result) aov_result$Petal.Length norm_test$Petal.Width
# step 1 lapply apply(array, margin, ...)
lapply(list, function)
my_list <- list(x = c(rnorm(30), NA), y = rnorm(10)) str(my_list)
lapply(my_list, mean) lapply(my_list, mean, na.rm = T) lapply(my_list, function(x) x * 2)
sapply(my_list, range, na.rm = T, simplify = F)
# step 2
cars <- c("Mazda", "Volga", "Merc") car <- "Mazda RX4"
sapply(cars, function(x) grepl(x, car))
lapply(cars, function(x) grepl(x, car))
# step 3 by tapply
tapply(mtcars$mpg, mtcars$am, function(x) mean(x)) aggregate(mpg ~ am, mtcars, function(x) mean(x))
by(iris[1:4], iris$Species, function(x) sapply(x,
function(col) shapiro.test(col)$p.value))
aggregate(. ~ Species, iris, function(x) shapiro.test(x)$p.value)
# step 4 vapply,
vapply(list, function, FUN.VALUE = type, ...) vapply(mtcars, mean, FUN.VALUE = numeric(1)) sapply(mtcars, mean)
mapply(rep, c(1, 2, 3, 4), c(1, 2, 3, 4))
rep(1, 3)
x <- c(20, 25, 13) m <- c(0, 1, 2)
s <- c(3, 5, 6) mapply(rnorm, x, m, s)
14. Работа с данными при помощи dplyr
# steps 3 - 4 data_frame
install.packages("dplyr")
library(dplyr)
my_data <- data_frame(x = rnorm(10000), y = rnorm(10000),
f = factor(rep(1:2, 5000)))
my.data <- data.frame(x = rnorm(10000), y = rnorm(10000),
f = factor(rep(1:2, 5000)))
library(ggplot2)
diamonds <- as_data_frame(diamonds) diamonds
glimpse(diamonds)
my_data_2 <- data_frame(x = rnorm(10), y = abs(x)) my.data.2 <- data.frame(x = rnorm(10), y = abs(x))
# step 5 select columns select(diamonds, 1, 2, 3) diamonds[c("cut", "price", "color")]
select(diamonds, contains("t"))
#step 6 slice rows slice(diamonds, c(1, 4, 5)) diamonds[c(1, 4, 5)]
#step 7 filter observations filter(diamonds, carat > 0.3 | color == "J")
diamonds[diamonds$carat > 0.3 & diamonds$color == "J",
]
subset(diamonds, carat > 0.3 & color == "J")
# steps 8 - 9 arrange and mutate arrange(diamonds, desc(price)) diamonds[order(diamonds$price, diamonds$depth), ]
m <- mutate(diamonds,
sqrt_price = sqrt(price), log_carat = log(carat))
mutate(mtcars, am = factor(am), vs = factor(vs))
# step 2 mutate_each library(ggplot2) library(dplyr)
d <- as_data_frame(matrix(rnorm(30), ncol = 5))
mutate_each(d, funs(ifelse(. < 0, 0, .)))
col_1 <- d$V1 col_2 <- d$V2
ifelse(col_1 < 0, 0, col_1) ifelse(col_2 < 0, 0, col_2)
my_fun <- function(x) ifelse(x < 0, 0, x) sapply(d, function(z) abs(z))