LTO9-Optimizer/optimizer.sh

186 lines
6.6 KiB
Bash
Raw Normal View History

2023-07-04 20:23:48 +02:00
#!/bin/sh
###
### Usage: ./optimizer.sh </dev/sgX> <parallelism>
###
2023-07-05 09:50:05 +02:00
### -h | --help Shows this message
2023-07-04 20:23:48 +02:00
###
### optimizer.sh
### Necessary parameters:
### - sg device of the library
### - parallelism
###
### The script checks the parallelism with the available drives of the library/partition, then
2023-07-05 09:50:05 +02:00
### proceeds with loading *all* the tapes found in the slots. Mailboxes are ignored.
2023-07-04 20:23:48 +02:00
### It launches several child processes. The `timeout` command tries to unload the drives,
### it succeeds when the the tape optimization is done.
###
2023-07-05 09:50:05 +02:00
### Kill switch to stop the running processes: `killall optimizer.sh`
###
2023-07-04 20:23:48 +02:00
help() {
sed -rn 's/^### ?//;T;p' "$0"
}
if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]] || [[ "$1" == "" ]]; then
help
exit 1
fi
log() {
echo -e "$(date +'%Y-%m-%d %H:%M:%S') $1" | tee -a ${LOG_FILE}
}
alive() {
# $1 "${LIB_SG}"
# $2 "${SLOT}"
# $3 "${DRIVE}"
# $4 "${BARCODE}"
echo "${4}" > "${TMP}/.drive${3}.lock"
local PID_ALIVE='1'
local WAIT=$((3600 + $RANDOM % 7200)) # from 1 to 2 hours
2023-07-05 09:50:05 +02:00
local TIMEOUT="600" # 10 minutes wait before dismounting the tape
2023-07-04 20:23:48 +02:00
while [ $PID_ALIVE -eq 1 ]; do
log "ALIVE [$$] - Waiting ${WAIT} secondi..."
sleep "${WAIT}"
log "ALIVE [$$] - Trying to unload drive ${3} into slot ${2} (Timeout: ${TIMEOUT} seconds)..."
2023-07-05 09:50:05 +02:00
timeout "${TIMEOUT}" mtx -f "${1}" unload "${2}" "${3}" # Kill after ${TIMEOUT} seconds
2023-07-04 20:23:48 +02:00
if [[ $(echo $?) == '0' ]]; then
log "ALIVE [$$] - OK. Drive ${3} unloaded."
grep -q "$4;drive$3;WIP" "${PROCESSED_BARCODES}" # Check if barcode is WIP
if [[ $(echo $?) == '0' ]]; then
log "ALIVE [$$] - Barcode $4 processed."
sed -i "s/$4;drive$3;WIP/$4;OK/g" "${PROCESSED_BARCODES}"
rm -f "${TMP}/.drive${3}.lock"
touch "${TMP}/.drive${3}.ready"
log "ALIVE [$$] - Drive ${3} newly available. Exit."
local PID_ALIVE='0'
2023-07-05 09:50:05 +02:00
return 0 # End Alive
2023-07-04 20:23:48 +02:00
else
log "ALIVE [$$] - ERROR. Can't find the barcode among the WIP. Exit."
return 1
2023-07-05 09:50:05 +02:00
fi # Barcode processed --> OK
2023-07-04 20:23:48 +02:00
elif [ -f "${TMP}/.drive${3}.lock" ]; then # Drive in use
log "ALIVE [$$] - Unmount drive ${3} failed, still in use (file .lock). Checking the content."
grep -q "${4}" "${TMP}/.drive${3}.lock"
if [[ $(echo $?) == '0' ]]; then
log "ALIVE [$$] - File content OK. Waiting."
continue
else
log "ALIVE [$$] - ERROR. File content of ${TMP}/.drive${3}.lock not coherent. Exit."
return 1
fi
elif [ -f "${TMP}/.drive${3}.ready" ]; then # Drive available
log "ALIVE [$$] - Found file ${TMP}/.drive${3}.ready. Drive available. Exit."
local PID_ALIVE='0'
return 0
fi
done
2023-07-04 23:04:43 +02:00
log "End ALIVE [$$]"
2023-07-04 20:23:48 +02:00
return 0
}
LIB_SG=$1
PARALLELISM=$2
WAIT=$((300))
TMP='/tmp'
LIB_CONFIG="${TMP}/lib_config.txt"
PROCESSED_SLOTS="${TMP}/processed-slots.txt"
PROCESSED_BARCODES="/root/processed-barcodes.txt"
log "***** New execution *****"
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_PATH="${SCRIPT_DIR}/log"
LOG_FILE="${LOG_PATH}/optimizer.log"
if [ ! -d "${LOG_PATH}" ]; then
echo "Creating the log directory: ${LOG_PATH}"
mkdir -vp ${LOG_PATH}
fi
mtx -f "${LIB_SG}" status > "${LIB_CONFIG}"
# Check if there are enough ($PARALLELISM) free drives
if [[ $(grep "Data Transfer Element" ${LIB_CONFIG} | grep 'Empty' | wc -l) -lt "${PARALLELISM}" ]]; then
log "ERROR. Not enough free drives. Exit."
exit 1
fi
# Clean the WIP barcodes from old executions
sed -i '/WIP/d' "${PROCESSED_BARCODES}"
# Cleaning orphan files
rm -rf ${TMP}/.drive*
rm -rf ${PROCESSED_SLOTS}
for (( i = 0 ; i < ${PARALLELISM} ; i++ )); do
grep -q "Data Transfer Element ${i}:Empty" ${LIB_CONFIG}
if [[ $(echo $?) == '0' ]]; then
if [[ ! -f "${TMP}/.drive${i}.ready" ]]; then
log "Creating file ${TMP}/.drive${i}.ready"
touch "${TMP}/.drive${i}.ready"
fi
fi
done
# List full slots
SLOTS_FULL="${TMP}/slots_full.txt"
grep 'Storage Element' ${LIB_CONFIG} | grep 'Full' | grep -v IMPORT | grep -v CLN | grep -v "(" > ${SLOTS_FULL}
COUNT=1
OFS=$IFS
IFS="
"
while [ $COUNT -le $(cat ${SLOTS_FULL} | wc -l) ]; do
RUNNING=$(ls -1q ${TMP}/.drive*.lock | wc -l)
if [[ "${RUNNING}" -lt "${PARALLELISM}" ]]; then
SLOT=$(sed "${COUNT}q;d" ${SLOTS_FULL} | awk '{print $3;}' | cut -d ':' -f 1)
BARCODE=$(sed "${COUNT}q;d" ${SLOTS_FULL} | awk '{print $4;}' | cut -d '=' -f 2)
2023-07-05 09:50:05 +02:00
grep -q "${SLOT}" "${PROCESSED_SLOTS}" # Check if slot is aready processed
2023-07-04 20:23:48 +02:00
if [[ $(echo $?) == '0' ]]; then
echo "Slot ${SLOT} already processed."
((COUNT++))
continue
fi
2023-07-05 09:50:05 +02:00
grep -q "${BARCODE};OK" "${PROCESSED_BARCODES}" # Check if barcode is already processed
2023-07-04 20:23:48 +02:00
if [[ $(echo $?) == '0' ]]; then
echo "Barcode ${BARCODE} already processed. Skip."
((COUNT++))
continue
fi
log "### SLOT ${SLOT} - BARCODE ${BARCODE}"
for (( i = 0 ; i < ${PARALLELISM} ; i++ )); do # Look for an available drive
2023-07-05 09:50:05 +02:00
if [ -f "${TMP}/.drive${i}.ready" ]; then # Look for ".ready" file
2023-07-04 20:23:48 +02:00
DRIVE=${i}
mv "${TMP}/.drive${DRIVE}.ready" "${TMP}/.drive${DRIVE}.lock"
break
fi
done
2023-07-05 09:50:05 +02:00
log "Loading slot ${SLOT} into drive ${DRIVE}."
2023-07-04 20:23:48 +02:00
mtx -f "${LIB_SG}" load "${SLOT}" "${DRIVE}"
echo "${BARCODE};drive${DRIVE};WIP" >> "${PROCESSED_BARCODES}" # Barcode WIP
echo ${SLOT} >> "${PROCESSED_SLOTS}"
2023-07-04 23:04:43 +02:00
log "Launching ALIVE with parameters: ${LIB_SG} - ${SLOT} - ${DRIVE} - ${BARCODE}"
2023-07-04 20:23:48 +02:00
alive "${LIB_SG}" "${SLOT}" "${DRIVE}" "${BARCODE}" &
RUNNING=$(ls -1q ${TMP}/.drive*.lock | wc -l)
log "MAIN - RUNNING processes: $RUNNING"
if [[ "${RUNNING}" -ge "${PARALLELISM}" ]]; then
2023-07-04 23:04:43 +02:00
log "MAIN (if) - Maximum parallelism. Waiting ${WAIT} seconds…"
2023-07-04 20:23:48 +02:00
sleep "${WAIT}"
else
((COUNT++))
log "Waiting 2 minutes to avoid too many commands."
sleep 120
fi
else
2023-07-04 23:04:43 +02:00
log "MAIN - Maximum parallelism. Waiting ${WAIT} seconds…"
2023-07-04 20:23:48 +02:00
sleep "${WAIT}"
fi
done
IFS=$OFS
rm -rf "${LIB_CONFIG}"
rm -rf "${SLOTS_FULL}"
rm -rf "${PROCESSED_SLOTS}"
log "***** End execution *****\n\n"