import os
from pathlib import Path
from gwf import Workflow, AnonymousTarget
import glob
GWF workflow to run a notebook
Generate a notebook executor (inplace and save the output)
r”“” # Usage:
Run a specific notebook with the following command:
gwf -f run_notebook.py:run.<notebook_name> run
where <notebook_name>
is the name of the notebook you want to run without the .ipynb
extension. For example, to run example_notebook.ipynb
, use: gwf -f run_notebook.py:run.example_notebook run
““”
Imports and utility functions
Template function to run a notebook
def run_notebook(path, memory='8g', walltime='00:10:00', cores=1):
"""
Executes a notebook inplace and saves the output.
"""
# path of output sentinel file
= f'.{str(Path(path).name)}.sentinel'
sentinel
= [path]
inputs = {'sentinel': sentinel}
outputs = {'memory': memory, 'walltime': walltime, 'cores': cores}
options
# commands to run in task (bash script)
= f"""
spec source $(conda info --base)/etc/profile.d/conda.sh
conda activate pymc
jupyter nbconvert --to notebook --execute --inplace --allow-errors --ExecutePreprocessor.iopub_timeout=600 {path} && touch {sentinel}
"""
# return target
return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
List the available notebooks to run
# list all notebooks in the current directory
= glob.glob('*.ipynb') notebooks
Set up dynamic workflow generator
# loop through notebooks and generate a workflow object for each
for notebook in notebooks:
# Define the name
= os.path.split(notebook)
par, nb = nb.split('.')[0]
nb = f"run.{nb}"
workflow_name
= Workflow(defaults={'account': 'hic-spermatogenesis'})
wf
wf.target_from_template(=f'run_{nb}',
name=run_notebook(notebook, memory='16g', walltime='04:00:00', cores=8)
template
)
globals()[workflow_name] = wf