Chapter 5 Static branching

5.1 Why static branching?

Static branching helps us write large plans compactly. Instead of typing out every single target by hand, we use a special shorthand to declare entire batches of similar targets. To practice static branching in a controlled setting, try the interactive exercises at https://wlandau.shinyapps.io/learndrakeplans (from the workshop at https://github.com/wlandau/learndrake).

Without static branching, plans like this one become too cumbersome to type by hand.

# Without static branching:

drake_plan(
  data = get_data(),
  analysis_fast_1_main = main(data, mean = 1, tuning = "fast"),
  analysis_slow_1_main = main(data, mean = 1, tuning = "slow"),
  analysis_fast_2_main = main(data, mean = 2, tuning = "fast"),
  analysis_slow_2_main = main(data, mean = 2, tuning = "slow"),
  analysis_fast_3_main = main(data, mean = 3, tuning = "fast"),
  analysis_slow_3_main = main(data, mean = 3, tuning = "slow"),
  analysis_fast_4_main = main(data, mean = 4, tuning = "fast"),
  analysis_slow_4_main = main(data, mean = 4, tuning = "slow"),
  analysis_fast_1_altv = altv(data, mean = 1, tuning = "fast"),
  analysis_slow_1_altv = altv(data, mean = 1, tuning = "slow"),
  analysis_fast_2_altv = altv(data, mean = 2, tuning = "fast"),
  analysis_slow_2_altv = altv(data, mean = 2, tuning = "slow"),
  analysis_fast_3_altv = altv(data, mean = 3, tuning = "fast"),
  analysis_slow_3_altv = altv(data, mean = 3, tuning = "slow"),
  analysis_fast_4_altv = altv(data, mean = 4, tuning = "fast"),
  analysis_slow_4_altv = altv(data, mean = 4, tuning = "slow"),
  summary_analysis_fast_1_main = summarize_model(analysis_fast_1_main),
  summary_analysis_slow_1_main = summarize_model(analysis_slow_1_main),
  summary_analysis_fast_2_main = summarize_model(analysis_fast_2_main),
  summary_analysis_slow_2_main = summarize_model(analysis_slow_2_main),
  summary_analysis_fast_3_main = summarize_model(analysis_fast_3_main),
  summary_analysis_slow_3_main = summarize_model(analysis_slow_3_main),
  summary_analysis_fast_4_main = summarize_model(analysis_fast_4_main),
  summary_analysis_slow_4_main = summarize_model(analysis_slow_4_main),
  summary_analysis_fast_1_altv = summarize_model(analysis_fast_1_altv),
  summary_analysis_slow_1_altv = summarize_model(analysis_slow_1_altv),
  summary_analysis_fast_2_altv = summarize_model(analysis_fast_2_altv),
  summary_analysis_slow_2_altv = summarize_model(analysis_slow_2_altv),
  summary_analysis_fast_3_altv = summarize_model(analysis_fast_3_altv),
  summary_analysis_slow_3_altv = summarize_model(analysis_slow_3_altv),
  summary_analysis_fast_4_altv = summarize_model(analysis_fast_4_altv),
  summary_analysis_slow_4_altv = summarize_model(analysis_slow_4_altv),
  model_summary_altv = dplyr::bind_rows(
    summary_analysis_fast_1_altv,
    summary_analysis_slow_1_altv,
    summary_analysis_fast_2_altv,
    summary_analysis_slow_2_altv,
    summary_analysis_fast_3_altv,
    summary_analysis_slow_3_altv,
    summary_analysis_fast_4_altv,
    summary_analysis_slow_4_altv
  ),
  model_summary_main = dplyr::bind_rows(
    summary_analysis_fast_1_main,
    summary_analysis_slow_1_main,
    summary_analysis_fast_2_main,
    summary_analysis_slow_2_main,
    summary_analysis_fast_3_main,
    summary_analysis_slow_3_main,
    summary_analysis_fast_4_main,
    summary_analysis_slow_4_main
  )
)

Static branching makes it easier to write and understand plans. To activate static branching, use the transform argument of target().

# With static branching:

model_functions <- rlang::syms(c("main", "altv")) # We need symbols.

model_functions # List of symbols.
#> [[1]]
#> main
#> 
#> [[2]]
#> altv

plan <- drake_plan(
  data = get_data(),
  analysis = target(
    model_function(data, mean = mean_value, tuning = tuning_setting),
    # Define an analysis target for each combination of
    # tuning_setting, mean_value, and model_function.
    transform = cross( 
      tuning_setting = c("fast", "slow"),
      mean_value = !!(1:4), # Why `!!`? See "Tidy Evaluation" below.
      model_function = !!model_functions # Why `!!`? See "Tidy Evaluation" below.
    )
  ),
  # Define a new summary target for each analysis target defined previously.
  summary = target(
    summarize_model(analysis),
    transform = map(analysis)
  ),
  # Group together the summary targets by the corresponding value
  # of model_function.
  model_summary = target(
    dplyr::bind_rows(summary),
    transform = combine(summary, .by = model_function) 
  )
)

plan
#> # A tibble: 35 x 2
#>    target                command                               
#>    <chr>                 <expr_lst>                            
#>  1 analysis_fast_1L_main main(data, mean = 1L, tuning = "fast")
#>  2 analysis_slow_1L_main main(data, mean = 1L, tuning = "slow")
#>  3 analysis_fast_2L_main main(data, mean = 2L, tuning = "fast")
#>  4 analysis_slow_2L_main main(data, mean = 2L, tuning = "slow")
#>  5 analysis_fast_3L_main main(data, mean = 3L, tuning = "fast")
#>  6 analysis_slow_3L_main main(data, mean = 3L, tuning = "slow")
#>  7 analysis_fast_4L_main main(data, mean = 4L, tuning = "fast")
#>  8 analysis_slow_4L_main main(data, mean = 4L, tuning = "slow")
#>  9 analysis_fast_1L_altv altv(data, mean = 1L, tuning = "fast")
#> 10 analysis_slow_1L_altv altv(data, mean = 1L, tuning = "slow")
#> # … with 25 more rows

Always check the graph to make sure the plan makes sense.

plot(plan) # a quick and dirty alternative to vis_drake_graph()

If the graph is too complicated to look at or too slow to load, downsize the plan with max_expand. Then, when you are done debugging and testing, remove max_expand to scale back up to the full plan.

model_functions <- rlang::syms(c("main", "altv"))

plan <- drake_plan(
  max_expand = 2,
  data = get_data(),
  analysis = target(
    model_function(data, mean = mean_value, tuning = tuning_setting),
    transform = cross(
      tuning_setting = c("fast", "slow"),
      mean_value = !!(1:4), # Why `!!`? See "Tidy Evaluation" below.
      model_function = !!model_functions # Why `!!`? See "Tidy Evaluation" below.
    )
  ),
  summary = target(
    summarize_model(analysis),
    transform = map(analysis)
  ),
  model_summary = target(
    dplyr::bind_rows(summary),
    transform = combine(summary, .by = model_function) # defined in "analysis" 
  )
)

# Click and drag the nodes in the graph to improve the view.
plot(plan)

5.2 Grouping variables

A grouping variable contains iterated values for a single instance of map() or cross(). mean_value and tuning_par are grouping variables below. Notice how they are defined inside cross(). Grouping variables are not targets, and they must be declared inside static transformations.

drake_plan(
  data = get_data(),
  model = target(
    fit_model(data, mean_value, tuning_par),
    transform = cross(
      mean_value = c(1, 2),
      tuning_par = c("fast", "slow")
    )
  )
)
#> # A tibble: 5 x 2
#>   target       command                   
#>   <chr>        <expr_lst>                
#> 1 data         get_data()                
#> 2 model_1_fast fit_model(data, 1, "fast")
#> 3 model_2_fast fit_model(data, 2, "fast")
#> 4 model_1_slow fit_model(data, 1, "slow")
#> 5 model_2_slow fit_model(data, 2, "slow")

Each model has its own mean_value and tuning_par. To see this correspondence, set trace = TRUE.

drake_plan(
  trace = TRUE,
  data = get_data(),
  model = target(
    fit_model(data, mean_value, tuning_par),
    transform = cross(
      mean_value = c(1, 2),
      tuning_par = c("fast", "slow")
    )
  )
)
#> # A tibble: 5 x 5
#>   target       command                    mean_value tuning_par model       
#>   <chr>        <expr_lst>                 <chr>      <chr>      <chr>       
#> 1 data         get_data()                 <NA>        <NA>      <NA>        
#> 2 model_1_fast fit_model(data, 1, "fast") 1          "\"fast\"" model_1_fast
#> 3 model_2_fast fit_model(data, 2, "fast") 2          "\"fast\"" model_2_fast
#> 4 model_1_slow fit_model(data, 1, "slow") 1          "\"slow\"" model_1_slow
#> 5 model_2_slow fit_model(data, 2, "slow") 2          "\"slow\"" model_2_slow

If we summarize those models, each summary has its own mean_value and tuning_par. In other words, grouping variables have a natural nesting, and they propagate forward so we can use them in downstream targets. Notice how mean_value and tuning_par appear in summarize_model() and combine() below.

plan <- drake_plan(
  trace = TRUE,
  data = get_data(),
  model = target(
    fit_model(data, mean_value, tuning_par),
    transform = cross(
      mean_value = c(1, 2),
      tuning_par = c("fast", "slow")
    )
  ),
  summary = target(
    # mean_value and tuning_par are old grouping variables from the models
    summarize_model(model, mean_value, tuning_par),
    transform = map(model)
  ),
  summary_by_tuning = target(
    dplyr::bind_rows(summary),
    # tuning_par is an old grouping variable from the models.
    transform = combine(summary, .by = tuning_par)
  )
)

plot(plan)

5.2.1 Limitations of grouping variables

Each grouping variable should be defined only once. In the plan below, there are multiple conflicting definitions of a1, a2, and a3 in the dependencies of c1, so drake does not know which definitions to use.

drake_plan(
  b1 = target(1, transform = map(a1 = 1, a2 = 1, .id = FALSE)),
  b2 = target(1, transform = map(a1 = 1, a3 = 1, .id = FALSE)),
  b3 = target(1, transform = map(a2 = 1, a3 = 1, .id = FALSE)),
  c1 = target(1, transform = map(a1, a2, a3, .id = FALSE)),
  trace = TRUE
)
#> Warning in min(vapply(out, length, FUN.VALUE = integer(1))): no non-missing
#> arguments to min; returning Inf
#> Error: A grouping variable for target c1 is either undefined or improperly invoked. Details: https://books.ropensci.org/drake/static.html#grouping-variables

Other workarounds include bind_plans() (on separate sub-plans) and dynamic branching. Always check your plans before you run them (vis_drake_graph() etc.).

5.3 Tidy evaluation

In earlier plans, we used “bang-bang” operator !! from tidy evaluation, e.g. model_function = !!model_functions in cross(). But why? Why not just type model_function = model_functions? Consider the following incorrect plan.

model_functions <- rlang::syms(c("main", "altv"))

plan <- drake_plan(
  data = get_data(),
  analysis = target(
    model_function(data, mean = mean_value, tuning = tuning_setting),
    transform = cross(
      tuning_setting = c("fast", "slow"),
      mean_value = 1:4, # without !!
      model_function = model_functions # without !!
    )
  )
)

drake_plan_source(plan)
#> drake_plan(
#>   analysis_fast_1_model_functions = model_functions(data, mean = 1, tuning = "fast"),
#>   analysis_slow_1_model_functions = model_functions(data, mean = 1, tuning = "slow"),
#>   analysis_fast_4_model_functions = model_functions(data, mean = 4, tuning = "fast"),
#>   analysis_slow_4_model_functions = model_functions(data, mean = 4, tuning = "slow"),
#>   data = get_data()
#> )

Because we omit !!, we create two problems:

  1. The commands use model_functions() instead of the desired main() and altv().
  2. We are missing the targets with mean = 2 and mean = 3.

Why? To make static branching work properly, drake does not actually evaluate the arguments to cross(). It just uses the raw symbols and expressions. To force drake to use the values instead, we need !!.

model_functions <- rlang::syms(c("main", "altv"))

plan <- drake_plan(
  data = get_data(),
  analysis = target(
    model_function(data, mean = mean_value, tuning = tuning_setting),
    transform = cross(
      tuning_setting = c("fast", "slow"),
      mean_value = !!(1:4), # with !!
      model_function = !!model_functions # with !!
    )
  )
)

drake_plan_source(plan)
#> drake_plan(
#>   analysis_fast_1L_main = main(data, mean = 1L, tuning = "fast"),
#>   analysis_slow_1L_main = main(data, mean = 1L, tuning = "slow"),
#>   analysis_fast_2L_main = main(data, mean = 2L, tuning = "fast"),
#>   analysis_slow_2L_main = main(data, mean = 2L, tuning = "slow"),
#>   analysis_fast_3L_main = main(data, mean = 3L, tuning = "fast"),
#>   analysis_slow_3L_main = main(data, mean = 3L, tuning = "slow"),
#>   analysis_fast_4L_main = main(data, mean = 4L, tuning = "fast"),
#>   analysis_slow_4L_main = main(data, mean = 4L, tuning = "slow"),
#>   analysis_fast_1L_altv = altv(data, mean = 1L, tuning = "fast"),
#>   analysis_slow_1L_altv = altv(data, mean = 1L, tuning = "slow"),
#>   analysis_fast_2L_altv = altv(data, mean = 2L, tuning = "fast"),
#>   analysis_slow_2L_altv = altv(data, mean = 2L, tuning = "slow"),
#>   analysis_fast_3L_altv = altv(data, mean = 3L, tuning = "fast"),
#>   analysis_slow_3L_altv = altv(data, mean = 3L, tuning = "slow"),
#>   analysis_fast_4L_altv = altv(data, mean = 4L, tuning = "fast"),
#>   analysis_slow_4L_altv = altv(data, mean = 4L, tuning = "slow"),
#>   data = get_data()
#> )

5.4 Static transformations

There are four transformations in static branching: map(), cross(), split(), and combine(). They are not actual functions, just special language to supply to the transform argument of target() in drake_plan(). Each transformation is similar to a function from the Tidyverse.

drake Tidyverse analogue
map() pmap() from purrr
cross() crossing() from tidyr
split() group_map() from dplyr
combine() summarize() from dplyr

5.4.1 map()

map() creates a new target for each row in a grid.

drake_plan(
  x = target(
    simulate_data(center, scale),
    transform = map(center = c(2, 1, 0), scale = c(3, 2, 1))
  )
)
#> # A tibble: 3 x 2
#>   target command            
#>   <chr>  <expr_lst>         
#> 1 x_2_3  simulate_data(2, 3)
#> 2 x_1_2  simulate_data(1, 2)
#> 3 x_0_1  simulate_data(0, 1)

You can supply the grid directly with the .data argument. Note the use of !! below. (See the tidy evaluation section.)

my_grid <- tibble(
  sim_function = c("rnorm", "rt", "rcauchy"),
  title = c("Normal", "Student t", "Cauchy")
)
my_grid$sim_function <- rlang::syms(my_grid$sim_function)

drake_plan(
  x = target(
    simulate_data(sim_function, title, center, scale),
    transform = map(
      center = c(2, 1, 0),
      scale = c(3, 2, 1),
      .data = !!my_grid,
      # In `.id`, you can select one or more grouping variables
      # for pretty target names.
      # Set to FALSE to use short numeric suffixes.
      .id = sim_function # Try `.id = c(sim_function, center)` yourself.
    )
  )
)
#> # A tibble: 3 x 2
#>   target    command                               
#>   <chr>     <expr_lst>                            
#> 1 x_rnorm   simulate_data(rnorm, "Normal", 2, 3)  
#> 2 x_rt      simulate_data(rt, "Student t", 1, 2)  
#> 3 x_rcauchy simulate_data(rcauchy, "Cauchy", 0, 1)

5.4.2 cross()

cross() creates a new target for each combination of argument values.

drake_plan(
  x = target(
    simulate_data(nrow, ncol),
    transform = cross(nrow = c(1, 2, 3), ncol = c(4, 5))
  )
)
#> # A tibble: 6 x 2
#>   target command            
#>   <chr>  <expr_lst>         
#> 1 x_1_4  simulate_data(1, 4)
#> 2 x_2_4  simulate_data(2, 4)
#> 3 x_3_4  simulate_data(3, 4)
#> 4 x_1_5  simulate_data(1, 5)
#> 5 x_2_5  simulate_data(2, 5)
#> 6 x_3_5  simulate_data(3, 5)

5.4.3 split()

The split() transformation distributes a dataset as uniformly as possible across multiple targets.

plan <- drake_plan(
  large_data = get_data(),
  slice_analysis = target(
    large_data %>%
      analyze(),
    transform = split(large_data, slices = 4)
  ),
  results = target(
    dplyr::bind_rows(slice_analysis),
    transform = combine(slice_analysis)
  )
)

plan
#> # A tibble: 6 x 2
#>   target         command                                                        
#>   <chr>          <expr_lst>                                                     
#> 1 large_data     get_data()                                                    …
#> 2 results        dplyr::bind_rows(slice_analysis_1, slice_analysis_2, slice_ana…
#> 3 slice_analysi… drake_slice(data = large_data, slices = 4, index = 1) %>% anal…
#> 4 slice_analysi… drake_slice(data = large_data, slices = 4, index = 2) %>% anal…
#> 5 slice_analysi… drake_slice(data = large_data, slices = 4, index = 3) %>% anal…
#> 6 slice_analysi… drake_slice(data = large_data, slices = 4, index = 4) %>% anal…
plot(plan)

At runtime, drake_slice() takes a single subset of the data. It supports data frames, matrices, and arbitrary arrays.

drake_slice(mtcars, slices = 32, index = 1)
#>           mpg cyl disp  hp drat   wt  qsec vs am gear carb
#> Mazda RX4  21   6  160 110  3.9 2.62 16.46  0  1    4    4

drake_slice(mtcars, slices = 32, index = 2)
#>               mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4

5.4.4 combine()

combine() aggregates targets. The closest comparison is the unquote-splice operator !!! from tidy evaluation.

plan <- drake_plan(
  data_group1 = target(
    sim_data(mean = x, sd = y),
    transform = map(x = c(1, 2), y = c(3, 4))
  ),
  data_group2 = target(
    pull_data(url),
    transform = map(url = c("example1.com", "example2.com"))
  ),
  larger = target(
    bind_rows(data_group1, data_group2, .id = "id") %>%
      arrange(sd) %>%
      head(n = 400),
    transform = combine(data_group1, data_group2)
  )
)

drake_plan_source(plan)
#> drake_plan(
#>   data_group1_1_3 = sim_data(mean = 1, sd = 3),
#>   data_group1_2_4 = sim_data(mean = 2, sd = 4),
#>   data_group2_example1.com = pull_data("example1.com"),
#>   data_group2_example2.com = pull_data("example2.com"),
#>   larger = bind_rows(data_group1_1_3, data_group1_2_4, data_group2_example1.com,
#>     data_group2_example2.com,
#>     .id = "id"
#>   ) %>%
#>     arrange(sd) %>%
#>     head(n = 400)
#> )

To create multiple combined groups, use the .by argument.

plan <- drake_plan(
  data = target(
    sim_data(mean = x, sd = y, skew = z),
    transform = cross(x = c(1, 2), y = c(3, 4), z = c(5, 6))
  ),
  combined = target(
    bind_rows(data, .id = "id") %>%
      arrange(sd) %>%
      head(n = 400),
    transform = combine(data, .by = c(x, y))
  )
)

drake_plan_source(plan)
#> drake_plan(
#>   combined_1_3 = bind_rows(data_1_3_5, data_1_3_6, .id = "id") %>%
#>     arrange(sd) %>%
#>     head(n = 400),
#>   combined_2_3 = bind_rows(data_2_3_5, data_2_3_6, .id = "id") %>%
#>     arrange(sd) %>%
#>     head(n = 400),
#>   combined_1_4 = bind_rows(data_1_4_5, data_1_4_6, .id = "id") %>%
#>     arrange(sd) %>%
#>     head(n = 400),
#>   combined_2_4 = bind_rows(data_2_4_5, data_2_4_6, .id = "id") %>%
#>     arrange(sd) %>%
#>     head(n = 400),
#>   data_1_3_5 = sim_data(mean = 1, sd = 3, skew = 5),
#>   data_2_3_5 = sim_data(mean = 2, sd = 3, skew = 5),
#>   data_1_4_5 = sim_data(mean = 1, sd = 4, skew = 5),
#>   data_2_4_5 = sim_data(mean = 2, sd = 4, skew = 5),
#>   data_1_3_6 = sim_data(mean = 1, sd = 3, skew = 6),
#>   data_2_3_6 = sim_data(mean = 2, sd = 3, skew = 6),
#>   data_1_4_6 = sim_data(mean = 1, sd = 4, skew = 6),
#>   data_2_4_6 = sim_data(mean = 2, sd = 4, skew = 6)
#> )

5.5 Target names

drake releases after 7.12.0 let you define your own custom names with the optional .names argument of transformations.

analysis_names <- c("experimental", "thorough", "minimal", "naive")

plan <- drake_plan(
  dataset = target(
    get_dataset(data_index),
    transform = map(data_index = !!seq_len(2), .names = c("new", "old"))
  ),
  analysis = target(
    apply_method(method_name, dataset),
    transform = cross(
      method_name = c("method1", "method2"),
      dataset,
      .names = !!analysis_names
    )
  ),
  summary = target(
    summarize(analysis),
    transform = combine(analysis, .by = dataset, .names = c("table1", "table2"))
  )
)

plan
#> # A tibble: 8 x 2
#>   target       command                          
#>   <chr>        <expr_lst>                       
#> 1 experimental apply_method("method1", new)     
#> 2 thorough     apply_method("method2", new)     
#> 3 minimal      apply_method("method1", old)     
#> 4 naive        apply_method("method2", old)     
#> 5 new          get_dataset(1L)                  
#> 6 old          get_dataset(2L)                  
#> 7 table1       summarize(experimental, thorough)
#> 8 table2       summarize(minimal, naive)

plot(plan)

The disadvantage of .names is you need to know in advance the number of targets a transformation will generate. As an alternative, all transformations have an optional .id argument to control the names of targets. Use it to select the grouping variables that go into the names, as well as the order they appear in the suffixes.

drake_plan(
  data = target(
    get_data(param1, param2),
    transform = map(
      param1 = c(123, 456),
      param2 = c(7, 9),
      param2 = c("abc", "xyz"),
      .id = param2
    )
  )
)
#> # A tibble: 2 x 2
#>   target command         
#>   <chr>  <expr_lst>      
#> 1 data_7 get_data(123, 7)
#> 2 data_9 get_data(456, 9)
drake_plan(
  data = target(
    get_data(param1, param2),
    transform = map(
      param1 = c(123, 456),
      param2 = c(7, 9),
      param2 = c("abc", "xyz"),
      .id = c(param2, param1)
    )
  )
)
#> # A tibble: 2 x 2
#>   target     command         
#>   <chr>      <expr_lst>      
#> 1 data_7_123 get_data(123, 7)
#> 2 data_9_456 get_data(456, 9)
drake_plan(
  data = target(
    get_data(param1, param2),
    transform = map(
      param1 = c(123, 456),
      param2 = c(7, 9),
      param2 = c("abc", "xyz"),
      .id = c(param1, param2)
    )
  )
)
#> # A tibble: 2 x 2
#>   target     command         
#>   <chr>      <expr_lst>      
#> 1 data_123_7 get_data(123, 7)
#> 2 data_456_9 get_data(456, 9)

Set .id to FALSE to ignore the grouping variables altogether.

drake_plan(
  data = target(
    get_data(param1, param2),
    transform = map(
      param1 = c(123, 456),
      param2 = c(7, 9),
      param2 = c("abc", "xyz"),
      .id = FALSE
    )
  )
)
#> # A tibble: 2 x 2
#>   target command         
#>   <chr>  <expr_lst>      
#> 1 data   get_data(123, 7)
#> 2 data_2 get_data(456, 9)

Finally, drake supports a special .id_chr symbol in commands to let you refer to the name of the current target as a character string.

as_chr <- function(x) {
  deparse(substitute(x))
}
plan <- drake_plan(
  data = target(
    get_data(param),
    transform = map(param = c(123, 456))
  ),
  keras_model = target(
    save_model_hdf5(fit_model(data), file_out(!!sprintf("%s.h5", .id_chr))),
    transform = map(data, .id = param)
  ),
  result = target(
    predict(load_model_hdf5(file_in(!!sprintf("%s.h5", as_chr(keras_model))))),
    transform = map(keras_model, .id = param)
  )
)

plan
#> # A tibble: 6 x 2
#>   target          command                                                       
#>   <chr>           <expr_lst>                                                    
#> 1 data_123        get_data(123)                                                …
#> 2 data_456        get_data(456)                                                …
#> 3 keras_model_123 save_model_hdf5(fit_model(data_123), file_out("keras_model_12…
#> 4 keras_model_456 save_model_hdf5(fit_model(data_456), file_out("keras_model_45…
#> 5 result_123      predict(load_model_hdf5(file_in("keras_model_123.h5")))      …
#> 6 result_456      predict(load_model_hdf5(file_in("keras_model_456.h5")))      …
drake_plan_source(plan)
#> drake_plan(
#>   data_123 = get_data(123),
#>   data_456 = get_data(456),
#>   keras_model_123 = save_model_hdf5(fit_model(data_123), file_out("keras_model_123.h5")),
#>   keras_model_456 = save_model_hdf5(fit_model(data_456), file_out("keras_model_456.h5")),
#>   result_123 = predict(load_model_hdf5(file_in("keras_model_123.h5"))),
#>   result_456 = predict(load_model_hdf5(file_in("keras_model_456.h5")))
#> )

5.6 Tags

A tag is a custom grouping variable for a transformation. There are two kinds of tags:

  1. In-tags, which contain the target name you start with, and
  2. Out-tags, which contain the target names generated by the transformations.
drake_plan(
  x = target(
    command,
    transform = map(y = c(1, 2), .tag_in = from, .tag_out = c(to, out))
  ),
  trace = TRUE
)
#> # A tibble: 2 x 7
#>   target command    y     x     from  to    out  
#>   <chr>  <expr_lst> <chr> <chr> <chr> <chr> <chr>
#> 1 x_1    command    1     x_1   x     x_1   x_1  
#> 2 x_2    command    2     x_2   x     x_2   x_2

Subsequent transformations can use tags as grouping variables and add to existing tags.

plan <- drake_plan(
  prep_work = do_prep_work(),
  local = target(
    get_local_data(n, prep_work),
    transform = map(n = c(1, 2), .tag_in = data_source, .tag_out = data)
  ),
  online = target(
    get_online_data(n, prep_work, port = "8080"),
    transform = map(n = c(1, 2), .tag_in = data_source, .tag_out = data)
  ),
  summary = target(
    summarize(bind_rows(data, .id = "data")),
    transform = combine(data, .by = data_source)
  ),
  munged = target(
    munge(bind_rows(data, .id = "data")),
    transform = combine(data, .by = n)
  )
)

drake_plan_source(plan)
#> drake_plan(
#>   local_1 = get_local_data(1, prep_work),
#>   local_2 = get_local_data(2, prep_work),
#>   munged_1 = munge(bind_rows(local_1, online_1, .id = "data")),
#>   munged_2 = munge(bind_rows(local_2, online_2, .id = "data")),
#>   online_1 = get_online_data(1, prep_work, port = "8080"),
#>   online_2 = get_online_data(2, prep_work, port = "8080"),
#>   prep_work = do_prep_work(),
#>   summary_local = summarize(bind_rows(local_1, local_2, .id = "data")),
#>   summary_online = summarize(bind_rows(online_1, online_2, .id = "data"))
#> )

plot(plan)
Copyright Eli Lilly and Company