-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_runtime.R
More file actions
102 lines (88 loc) · 3.41 KB
/
check_runtime.R
File metadata and controls
102 lines (88 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Check runtimes
library(batchtools)
library(ggplot2)
reg_name <- "rpf_batchmark_binary_auc"
reg_dir <- here::here("registry", reg_name)
loadRegistry(reg_dir, writeable = FALSE)
# Load task metadata
task_summary <- readRDS("task_summary.rds")
# Get results
jobs <- unwrap(getJobTable())[, c("job.id", "task_id", "learner_id", "time.running")]
jobs[, time_days := as.numeric(time.running)/60/60/24]
jobs[, time.running := NULL]
jobs[, task_id := gsub(" \\(Supervised Classification\\)", "", gsub("Task \\d+: ", "", task_id))]
jobs[, learner_id := gsub("\\.tuned", "", learner_id)]
jobs[, learner_id := gsub("^(encode\\.)?classif\\.", "", learner_id)]
#jobs[, job.id := NULL]
runtimes_avg <- jobs[, .(mean_days = mean(time_days)), keyby = .(learner_id, task_id)]
jobs <- ijoin(jobs, task_summary[, c("task_name", "dim", "n", "p")], by = c("task_id" = "task_name"))
# Define learner colors for somewhat identifiable plots
learner_cols <- c(
"ranger" = "#2171B5",
"xgboost" = "#3BA99C",
"xgboost_fixdepth" = "#256A62",
"rpf" = "#F73098",
"rpf_fixmax" = "#CA1694"
)
# scales::show_col(learner_cols)
jobs[, task_id := sprintf("%s (%d x %d)", task_id, n, p)]
jobs |>
# subset(dim > 24195) |>
ggplot(aes(x = reorder(task_id, dim), y = time_days, color = learner_id, fill = learner_id)) +
geom_boxplot(alpha = .5, position = position_dodge(width = .5), width = 1) +
coord_flip() +
scale_y_log10() +
scale_color_manual(values = learner_cols, aesthetics = c("color", "fill")) +
labs(
title = "Job Runtime (on BigBertha)",
subtitle = "Across all learners and tasks.\nTasks are ordered by n * p, decreasing",
x = "Task (n x p)", y = "Runtime (days, log10 scale)",
color = NULL, fill = NULL
) +
theme_minimal(base_size = 14) +
theme(
# plot.title.position = "plot",
legend.position = "bottom"
)
jobs |>
# subset(dim > 24195) |>
ggplot(aes(x = reorder(task_id, dim), y = time_days, color = learner_id, fill = learner_id)) +
geom_boxplot(alpha = .5, position = position_dodge(width = .5), width = 1) +
coord_flip() +
scale_color_manual(values = learner_cols, aesthetics = c("color", "fill")) +
labs(
title = "Job Runtime (on BigBertha)",
subtitle = "Across all learners and tasks.\nTasks are ordered by n * p, decreasing",
x = "Task (n x p)", y = "Runtime (days)",
color = NULL, fill = NULL
) +
theme_minimal(base_size = 14) +
theme(
# plot.title.position = "plot",
legend.position = "bottom"
)
jobs |>
ggplot(aes(x = dim, y = time_days, color = learner_id, fill = learner_id)) +
geom_point() +
geom_smooth(method = "loess", se = FALSE, span = 1.5) +
scale_x_log10() +
scale_y_log10() +
scale_color_manual(values = learner_cols, aesthetics = c("color", "fill")) +
labs(
title = "Job Runtime (on BigBertha) and Task Size",
subtitle = "Across all learners and tasks",
x = "Task Dimensionanility (n x p, log10 scale)", y = "Runtime (days, log10 scale)",
color = NULL, fill = NULL
) +
theme_minimal(base_size = 14) +
theme(
# plot.title.position = "plot",
legend.position = "bottom"
)
# Relative runtimes might be interesting
job_median_wide <- data.table::dcast(jobs, task_id + dim + n + p ~ learner_id, value.var = "time_days", fun.aggregate = median)
data.table::setorder(job_median_wide, -dim)
job_median_wide[, rpf_xgb := rpf/xgboost]
job_median_wide[, rpf_ranger := rpf/ranger]
job_median_wide[, xgb_ranger := xgboost/ranger]
job_median_wide