R Language (Сковорцов) / Анализ данных на языке R
.pdfproducts[price<1000,
name.with.price := paste0(name, " ( ", price, " СЂСѓР±.)")]
products[order(-price)]
products[, price := price / max(price), by=brand]
products
library(data.table)
purchases <- fread("purchases.csv") products <- fread("products.csv")
setkey(purchases, product_id, externalsessionid) setkey(products, product_id, brand)
key(purchases)
key(products)
merge(purchases, products, by = "product_id")
# merge(purchases, products, by = c("col1", "col2")) merge(purchases, products, by.x = "product_id", by.y = "product_id")
merge(purchases, products, all.x = T, all.y = F) merge(purchases, products)
purchases[products, on = "product_id"]
purchases[products]
setkey(products, product_id, price) setkey(purchases, product_id, ordernumber) purchases[products]
setkey(purchases, product_id, externalsessionid) setkey(products, product_id, brand)
products[purchases]
# J, SJ, CJ
products[J(c(158, 208, 10001, 826355, 958238))] products[data.table(
c(158, 208, 10001, 826355, 958238)
)]
products[.(c(158, 208, 10001, 826355, 958238))] products[list(c(158, 208, 10001, 826355, 958238))]
print(SJ(c(158, 208, 10001, 826355, 958238))) key(SJ(c(158, 208, 10001, 826355, 958238)))
print(CJ(c(158, 826355, 958238), c("Supra", "Func")))
key(CJ(c(158, 826355, 958238), c("Supra", "Func")))
library(data.table)
purchases <- fread("purchases.csv") products <- fread("products.csv")
purchases.with.brands <- merge( purchases,
products[, list(product_id, brand)], by="product_id"
)
pop.20.brands <- head( purchases.with.brands[,
list( total.brand.users =
length(unique(externalsessionid))
),
by=brand][order(-total.brand.users)], 20)
users <- purchases.with.brands[, list(unique.brands = length(unique(brand)),
items = .N,
brand = brand[1]), by=externalsessionid]
brand.loyal.users <- users[items > 1][unique.brands == 1][, list(total.loyal.users = .N), by=brand]
brand.stats <- merge( pop.20.brands, brand.loyal.users, by="brand"
)
brand.stats[, loyal := total.loyal.users / total.brand.users]
brand.stats[order(-loyal)]
16. Подробнее о визуализации: qplot, ggplot, geoms.
library(ggplot2)
data("diamonds")
qplot(x = price, data = diamonds)
qplot(x = price, y = carat, data = diamonds) qplot(x = cut, y = carat, data = diamonds)
v <- diamonds$carat qplot(v) qplot(diamonds$carat)
qplot(diamonds$carat, diamonds$price)
my_plot <- qplot(x = price, y = carat, data = diamonds)
qplot(x = price, y = carat,
color = color, shape = cut, data = diamonds,
geom = c("point", "smooth"))
qplot(mpg,
hp,
color = I("blue"),