James Thornton

A taxonomy of diffusion, flow and bridge matching through the lens of optimal transport

2023-09-01T00:00:00-07:00

Resource Management with Slurm

2020-11-13T00:00:00-08:00

slurm (Simple Linux Utility for Resource Management) is a resouce manager for running compute jobs across multiple servers. Although this has the benefit of additional control, it imposes constraints on compute resources and constrains interaction with servers to the slurm interface, this can be a pain. This post aims to be a useful go-to guide to common slurm commands and examples of how slurm may be used without the pain.

Background

Commmon commands

Remote Bash (debug) Typically one may access a commandline interface on remote machines through debug partition. This would be equivalent of ssh’ing into a remote machine.
- srun --pty -t 0:30:00 --partition=-debug bash

Check partitions

sinfo

greytail{thornton}% sinfo
PARTITION           AVAIL  TIMELIMIT  NODES  STATE NODELIST
swan01-debug*          up      30:00      1   idle swan01.cpu.stats.ox.ac.uk
swan02-debug           up      30:00      1   idle swan02.cpu.stats.ox.ac.uk
swan03-debug           up      30:00      1    mix swan03.cpu.stats.ox.ac.uk
swan11-debug           up      30:00      1   idle swan11.cpu.stats.ox.ac.uk
swan12-debug           up      30:00      1   idle swan12.cpu.stats.ox.ac.uk
grey01-debug           up      30:00      1   idle grey01.cpu.stats.ox.ac.uk
greyheron-debug        up      30:00      1   idle greyheron.stats.ox.ac.uk
greyplover-debug       up      30:00      1   idle greyplover.stats.ox.ac.uk
greywagtail-debug      up      30:00      1   idle greywagtail.stats.ox.ac.uk
greypartridge-debug    up      30:00      1   idle greypartridge.stats.ox.ac.uk
greyostrich-debug      up      30:00      1    mix greyostrich.stats.ox.ac.uk
grey-standard          up 7-00:00:00      4   idle greyheron.stats.ox.ac.uk,greypartridge.stats.ox.ac.uk,greyplover.stats.ox.ac.uk,greywagtail.stats.ox.ac.uk
grey-fast              up 7-00:00:00      1   idle grey01.cpu.stats.ox.ac.uk
grey-gpu               up 7-00:00:00      1    mix greyostrich.stats.ox.ac.uk
swan-1hr               up    1:00:00      1    mix swan03.cpu.stats.ox.ac.uk
swan-1hr               up    1:00:00      2   idle swan01.cpu.stats.ox.ac.uk,swan02.cpu.stats.ox.ac.uk
swan-6hrs              up    6:00:00      1    mix swan03.cpu.stats.ox.ac.uk
swan-6hrs              up    6:00:00      1   idle swan02.cpu.stats.ox.ac.uk
swan-2day              up 2-00:00:00      1    mix swan03.cpu.stats.ox.ac.uk
swan-large             up 7-00:00:00      2   idle swan11.cpu.stats.ox.ac.uk,swan12.cpu.stats.ox.ac.uk
stats-7day             up 7-00:00:00      1   idle emu.stats.ox.ac.uk

Check running jobs

squeue

greytail{thornton}% squeue
         JOBID PARTITION     NAME   ST       TIME  NODES NODELIST(REASON)
        845457 swan03-de     bash   R      14:54      1 swan03.cpu.stats.ox.ac.uk
        845455 swan03-de     bash   R      17:22      1 swan03.cpu.stats.ox.ac.uk
        845215 swan-2day      SCI   R    6:33:10      1 swan03.cpu.stats.ox.ac.uk
        845400  grey-gpu    job01   R    3:06:28      1 greyostrich.stats.ox.ac.uk
        845397  grey-gpu    job01   R    3:10:17      1 greyostrich.stats.ox.ac.uk
        841508  grey-gpu eff_n_12   R 1-07:35:22      1 greyostrich.stats.ox.ac.uk
        838246  grey-gpu    eff_n   R 2-18:29:05      1 greyostrich.stats.ox.ac.uk

Running Scripts

Create a file launch.sh on head node

Populate file with preamble to specify resources required

preamble = """#!/bin/bash
#SBATCH -A oxwasp
#SBATCH --time=20:00:00
#SBATCH [email protected]
#SBATCH --mail-type=ALL
#SBATCH --partition=grey-standard
#SBATCH --nodelist="greyheron.stats.ox.ac.uk"
#SBATCH --output="/tmp/slurm-JT-output"
#SBATCH --mem "15G"
#SBATCH --cpus-per-task 10
#SBATCH --gres=gpu:1

"""

Add commands to run something in the same file, after preamble (see below for example)
Launch slurm job sbatch launch.sh

Hosting a Jupyter Notebook


#!/bin/bash
#SBATCH -A oxwasp                       # Account to be used, e.g. academic, acadrel, aims, bigbayes, opig, oxcsml, oxwasp, rstudent, statgen, statml, visitors
#SBATCH -J job01                          # Job name, can be useful but optional
#SBATCH --time=7-00:00:00                   # Walltime - run time of just 30 seconds
#SBATCH [email protected]     # set email address to use, change to your own email address instead of "me"
#SBATCH --mail-type=ALL                   # Caution: fine for debug, but not if handling hundreds of jobs!
#SBATCH --partition=grey-gpu                # Select the swan one hour partition
#SBATCH --nodelist=greyostrich.stats.ox.ac.uk
#SBATCH --output="/tmp/slurm-JT-output"
#SBATCH --mem 20g
#SBATCH --cpus-per-task 5
#SBATCH --gres=gpu:1

cd /data/greyostrich/oxwasp/oxwasp18/thornton

source ./miniconda3/bin/activate bridge
pip install tornado
python -m ipykernel install --user --name=bridge

python -m jupyter notebook --ip greyostrich.stats.ox.ac.uk --no-browser --port 8888

Python Interface with Paramiko

Set-Up

Install parmiko library for ssh utils

Connect to slurm head node e.g. greytail via paramiko

client = paramiko.SSHClient()
client.load_system_host_keys()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(hostname='greytail')

Launch individual commands

command = 'sinfo'
stdin, stdout, stderr = client.exec_command(command)
lines = stdout.readlines()

Launch scripts

Create sbatch file in Python

preamble = """#!/bin/bash
#SBATCH -A oxwasp
#SBATCH --time=20:00:00
#SBATCH [email protected]
#SBATCH --mail-type=ALL
#SBATCH --partition=grey-standard
#SBATCH --nodelist="greyheron.stats.ox.ac.uk"
#SBATCH --output="/tmp/slurm-JT-output"
#SBATCH --mem "15G"
#SBATCH --cpus-per-task 10
"""


command = preamble + "\n" + """

cd /data/localhost/oxwasp/oxwasp18/thornton
touch test_new_file2.txt
"""

Create new file on head node and write sbatch commands to file

slurm_wd = '/data/thornton'
slurm_file = 'test_batch.sh'
  
ftp = client.open_sftp()
ftp.chdir(slurm_wd)
file=ftp.file(slurm_file, "w", -1)
file.write(command)
file.flush()
ftp.close()

Launch slurm sbatch remotely

sbatch_cmd = 'sbatch {0}'.format(os.path.join(slurm_wd, slurm_file))

stdin, stdout, stderr = client.exec_command(sbatch_cmd)

Conda Environments

2020-11-08T00:00:00-08:00

Virtual environments are a convenient way to manage library dependencies, environment variables, and ensure reproducibility. There are a couple of approaches to this: virtualenv,conda, and docker – see here for a discussion. This post will focus on conda, and give a few practical commands to get up and running.

Download and install miniconda

sh curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o "$conda_dir/miniconda.sh"
sh -x miniconda.sh -b -p "./miniconda3" 

Note: The default conda set-up requires editing the .bashrc file and setting environment variables to point to the conda executable. This is a pain when dealing with multiple servers, fortunately there are ways around this and the commands given here will not rely on editing the .bashrc.

Basic commands

Create environment called conda_venv, for Python version 3.8
./miniconda3/bin/conda create -n conda_venv python=3.8
Activate environment
source ./miniconda3/bin/activate conda_venv
De-activate environment
conda deactivate conda_venv
This adds the environment executables such as Python, pip and conda to the executable path.
Install/ uninstall: (once env is activated)
- Through conda: conda install -c anaconda numpy
- Through pip: pip install numpy
  Note: this pip executable will be installed when installing python, and the libraries installed via pip will be specific to the conda environment and not the global environment
Export installed dependencies to file
conda env export > env.yaml
Install dependencies from file: this is actually creating an environment from a yaml file, so no need to create an empty env first conda env create -f environment.yml

Conda environments with jupyter notebook

Make sure jupyter is installed
pip install jupyter
Add kernel for environment
python -m ipykernel install --user --name=conda_venv
For windows conda install pywin32

Speeding up Python with C++ and Pybind11

2020-08-01T00:00:00-07:00

A how-to set-up guide for using C++ with Python with Pybind11.

Instructions are based on this guide with some of the kinks worked out for common problems.

I am using conda to manage dependencies and using the MS Visual Studio 2019 IDE for C++.

Code here

1) Set-Up

Create project environment, I will be using conda but other environment managers are available. Go to a command line and enter the following:
```
 conda create -n venv
 conda activate venv
 conda install pip
 pip install pybind11
```
Configure MS Visual Studio C++ settings
- Create new C++ project called “superfastcode”
- Configure project properties, go to ribbon, Project > superfastcode Properties

Tab	Property	Value
General	General > Target Name	Specify the name of the module as you want to refer to it from Python in from…import statements. You use this same name in the C++ when defining the module for Python. If you want to use the name of the project as the module name, leave the default value of $(ProjectName).
	General (or Advanced) > Target Extension	.pyd
	Project Defaults > Configuration Type	Dynamic Library (.dll)
C/C++ > General	Additional Include Directories	Add the Python include folder as appropriate for your installation, for example C:\Users\james\Miniconda3\envs\venv\include.
C/C++ > Code Generation	Runtime Library	Multi-threaded DLL (/MD) (see Warning below)
Linker > General	Additional Library Directories	Add the Python libs folder containing .lib files as appropriate for your installation, for example, C:\Users\james\Miniconda3\venv\libs. (Be sure to point to the libs folder that contains .lib files, and not the Lib folder that contains .py files.)

2) Write some code

Within MS Visual Studio, add a .cpp file called “module.cpp”. To do this, go to Solution Explorer, Source Files, then select ‘add’ and choose the .cpp file

Copy the following code

#include 
#include 

const double e = 2.7182818284590452353602874713527;

double sinh_impl(double x) {
    return (1 - pow(e, (-2 * x))) / (2 * pow(e, -x));
}

double cosh_impl(double x) {
    return (1 + pow(e, (-2 * x))) / (2 * pow(e, -x));
}

double tanh_impl(double x) {
    return sinh_impl(x) / cosh_impl(x);
}

namespace py = pybind11;

PYBIND11_MODULE(superfastcode, m) {
    m.def("fast_tanh", &tanh_impl, R"pbdoc(
        Compute a hyperbolic tangent of a single argument expressed in radians.
    )pbdoc");

#ifdef VERSION_INFO
    m.attr("__version__") = VERSION_INFO;
#else
    m.attr("__version__") = "dev";
#endif
}

Check the solution builds, in MS VS, go to ribbon ctrl+shift+B or Build > build solution, ensuring the correct configuration.

3) Allow access to your C++ code from Python

Create a setup.py file to expose function to Python

Add a .cpp file as above to the Source Files, but rename it to setup.py

Copy the following

import os, sys

from distutils.core import setup, Extension
from distutils import sysconfig

cpp_args = ['-std=c++11', '-stdlib=libc++', '-mmacosx-version-min=10.7']

sfc_module = Extension(
    'superfastcode', sources=['module.cpp'],
    include_dirs=['pybind11/include'],
    language='c++',
    extra_compile_args=cpp_args,
    )

setup(
    name='superfastcode',
    version='1.0',
    description='Python package with superfastcode C++ extension (PyBind11)',
    ext_modules=[sfc_module],
)

Back to the commandline with environment venv activated, install the C++ module using pip
- Navigate to the directory containing ‘setup.py’ on the terminal
- Enter the following in ther terminal: pip install .
Check it works
- Again on the command line, go to a python interpretter for the venv environment
```
 python 
 >> from superfastcode import fast_tanh
```

Common Issues

32 bit vs 64 bit
- Errors such as “fatal error LNK1112: module machine type ‘x64’ conflicts with target machine type ‘X86’” are due to some bit configuratio mis-match
- Ensure that the bit version of Python and hence pybind11 installed matches the C++ bit chosen. At time of writing I installed Python 3.8 64 bit by default so chose x64 in the MS Visual Studio Configuaration Manager.
- Some more debugging issues here
Cannot find pybind11.h
- Ensure the Python “include” directory, which includes “pybind11.h”, is entered in the “Additional include directories” in the Project Properties setup as detailed above. Also ensure that project is being built using the configuration such as Platform x64 corresponding to the project properties with the correct include directories

Working with C++ and NumPy

NumPy arrays may be accessed through the protocol buffer. See more examples in the pybind11 docs here.

Copy full C++ code below into “module.cpp”.

    #include 
    #include 
    #include 

    const double e = 2.7182818284590452353602874713527;

    double sinh_impl(double x) {
        return (1 - pow(e, (-2 * x))) / (2 * pow(e, -x));
    }

    double cosh_impl(double x) {
        return (1 + pow(e, (-2 * x))) / (2 * pow(e, -x));
    }

    double tanh_impl(double x) {
        return sinh_impl(x) / cosh_impl(x);
    }

    namespace py = pybind11;

    py::array_t add_arrays(py::array_t input1, py::array_t input2) {
        py::buffer_info buf1 = input1.request(), buf2 = input2.request();

        if (buf1.ndim != 1 || buf2.ndim != 1)
            throw std::runtime_error("Number of dimensions must be one");

        if (buf1.size != buf2.size)
            throw std::runtime_error("Input shapes must match");

        /* No pointer is passed, so NumPy will allocate the buffer */
        auto result = py::array_t(buf1.size);

        py::buffer_info buf3 = result.request();

        double* ptr1 = (double*)buf1.ptr,
            * ptr2 = (double*)buf2.ptr,
            * ptr3 = (double*)buf3.ptr;

        for (size_t idx = 0; idx < buf1.shape[0]; idx++)
            ptr3[idx] = ptr1[idx] + ptr2[idx];

        return result;
    }

    PYBIND11_MODULE(superfastcode, m) {
        m.def("fast_tanh", &tanh_impl, R"pbdoc(
            Compute a hyperbolic tangent of a single argument expressed in radians.
        )pbdoc");
        m.def("add_arrays", &add_arrays, "Add two NumPy arrays");

    #ifdef VERSION_INFO
        m.attr("__version__") = VERSION_INFO;
    #else
        m.attr("__version__") = "dev";
    #endif
    }

Test in Python as follows, from terminal or otherwise

python
>>> import numpy as np
>>> from superfastcode import add_arrays
>>> a = np.array([1.,2.,3.])
>>> b = a.copy()
>>> add_arrays(a,b)
array([2., 4., 6.])