2024-07-01 15:25:30 -06:00
|
|
|
workflows:
|
|
|
|
- name: aggregate_workflow
|
|
|
|
steps:
|
2024-09-13 03:44:38 +09:00
|
|
|
- verb: "aggregate" # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/verbs/aggregate.py
|
2024-07-01 15:25:30 -06:00
|
|
|
args:
|
|
|
|
groupby: "type"
|
|
|
|
column: "col_multiplied"
|
|
|
|
to: "aggregated_output"
|
|
|
|
operation: "sum"
|
|
|
|
input:
|
|
|
|
source: "workflow:derive_workflow" # reference the derive_workflow, cause this one requires that one to run first
|
|
|
|
# Notice, these are out of order, the indexing engine will figure out the right order to run them in
|
|
|
|
|
|
|
|
- name: derive_workflow
|
|
|
|
steps:
|
2024-09-13 03:44:38 +09:00
|
|
|
- verb: "derive" # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/verbs/derive.py
|
2024-07-01 15:25:30 -06:00
|
|
|
args:
|
|
|
|
column1: "col1" # from above
|
|
|
|
column2: "col2" # from above
|
|
|
|
to: "col_multiplied" # new column name
|
|
|
|
operator: "*" # multiply the two columns,
|
|
|
|
# Since we're trying to act on the dataset, we don't need explicitly to specify an input
|
|
|
|
# "input": { "source": "source" } # use the dataset as the input to this verb. This is the default, so you can omit it.
|