1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
| rm(list = ls())
library(mlr3verse) library(tidyverse) library(paradox)
load("/home/lixiang/temp_file/all.data.RData")
df_sample <- data.table::fread("matedata.txt", header = TRUE) %>% dplyr::mutate(sample = paste0("s", 1:119))
df_otu <- data.table::fread("otu_taxon_F.txt", header = TRUE) %>% tibble::column_to_rownames(var = "#OTU ID") %>% t() %>% as.data.frame() %>% dplyr::mutate(Name = rownames(.)) %>% dplyr::left_join(df_sample[, c("Name", "Type", "sample")], by = "Name") %>% dplyr::select(-Name) %>% tibble::column_to_rownames(var = "sample") %>% dplyr::filter(Type != "Ck") %>% dplyr::mutate(Type = factor(Type, levels = unique(Type)))
df_colnames <- data.frame(Feature = colnames(df_otu)) %>% dplyr::mutate(temp = case_when( Feature == "Type" ~ "Type", Feature != "Type" ~ paste0("M", rownames(.)) )) colnames(df_otu) <- c(paste0("M", 1:(ncol(df_otu) - 1)), "Type")
task <- as_task_classif(df_otu, target = "Type") task
autoplot(task)
ranger <- lrn("classif.ranger", importance = "permutation") ranger
set.seed(707) split <- partition(task, ratio = 0.7)
ranger$param_set
search_space <- ps( num.trees = p_int(lower = 1, upper = 50, trafo = function(x) 20 * x), min.node.size = p_int(lower = 3, upper = 30) )
at <- auto_tuner( learner = ranger, resampling = rsmp("cv", folds = 10), measure = msr("classif.acc"), search_space = search_space, method = "random_search", term_evals = 10 )
set.seed(1102)
at$train(task, row_ids = split$train)
at$tuning_result
ranger$param_set$values <- at$tuning_result$learner_param_vals[[1]] ranger$train(task, row_ids = split$train)
predictions <- ranger$predict(task, row_ids = split$test) predictions predictions$confusion
predictions %>% as.data.table()
predictions$score(msr("classif.acc"))
ranger$predict_type <- "prob" ranger$train(task, row_ids = split$train) predictions <- ranger$predict(task, row_ids = split$test) predictions
predictions$score(msr("classif.acc")) autoplot(predictions, type = "roc") autoplot(predictions, type = "prc")
ranger$importance() %>% as.data.table(keep.rownames = TRUE) %>% stats::setNames(c("temp", "Importance")) %>% dplyr::left_join(df_colnames, by = "temp") %>% dplyr::select(-temp) %>% dplyr::arrange(-Importance) %>% dplyr::slice(1:10) %>% ggplot(aes(x = reorder(Feature, Importance), y = Importance)) + geom_col() + scale_y_continuous(expand = c(0, 0)) + coord_flip() + xlab("") + theme_bw()
|