entrypoint.sh#

#!/bin/bash
set -e

# === DIAGNOSTICS ===
echo "--- Permission diagnostics ---"
echo "User: $(whoami)"
echo "Permissions for /etc/munge:"
ls -ld /etc/munge
echo "Permissions for /var/log/munge:"
ls -ld /var/log/munge
echo "------------------------------"

# === CORRECT STARTUP ===
# Create required spool directories based on which service is being started
if [[ "$1" == "slurmctld" ]]; then
    echo "Creating spool directories for slurmctld..."
    mkdir -p /var/spool/slurm/ctld_state
    chown -R slurm:slurm /var/spool/slurm/ctld_state
elif [[ "$1" == "slurmd" ]]; then
    echo "Creating spool directories for slurmd..."
    # %n in slurm.conf is replaced by the hostname
    HOSTNAME=$(hostname)
    mkdir -p "/var/spool/slurm/slurmd_node_${HOSTNAME}"
    chown -R slurm:slurm "/var/spool/slurm/slurmd_node_${HOSTNAME}"
fi

# Start the munge daemon as the 'munge' user.
# 'su -s /bin/bash -c "COMMAND" USER' runs a command as another user.
echo "Starting munged as the 'munge' user..."
su -s /bin/bash -c "/usr/sbin/munged" munge

# Wait one second to ensure munge is running
sleep 2

# Determine if this is the master or a worker node,
# and start the appropriate Slurm process
if [[ "$1" == "slurmctld" ]]; then
    echo "Starting slurmctld..."
    # slurmctld must run as root to manage processes
    exec /usr/sbin/slurmctld -D -f /etc/slurm/slurm.conf
elif [[ "$1" == "slurmd" ]]; then
    echo "Starting slurmd..."
    # slurmd must run as root to manage processes
    exec /usr/sbin/slurmd -D -f /etc/slurm/slurm.conf
else
    echo "Error: Must specify 'slurmctld' or 'slurmd' as argument."
    exit 1
fi