Commit fb1b9670 authored by Yori Fournier's avatar Yori Fournier
Browse files

on the way to run_step.sh (reforged)

parent 936afee5
#!/bin/env bash
# get options
STEP_NAME=$1
COMMIT=$2
shift; shift;
ARTIFACTS_DIR_HASHES=($@)
DEPS_DIR=($@)
# echo ${ARTIFACTS_HASHES[@]}
#echo ${DEPS_DIR[@]}
# load env
source pipe-env.sh
# SETUP TMP DIR
TMP_HASH=$(date | sha1sum)
TMP_HASH=${TMP_HASH%% *}
echo ${TMP_HASH}
mkdir ${PIPE_ROOT}/${ARTIFACTS}/.${STEP_NAME}_${TMP_HASH}
TMP_DIR=${PIPE_ROOT}/${ARTIFACTS}/.${STEP_NAME}_${TMP_HASH}
touch ${TMP_DIR}/trace.toml
echo "[${STEP_NAME}]" >> ${TMP_DIR}/trace.toml
touch ${TMP_DIR}/parameters_checksum.sha1
# BUILD HASH
BUILD_HASH=$(docker inspect ${STEP_NAME}:${COMMIT} --format="{{.Id}}")
if [ -z ${BUILD_HASH} ];
then
echo "ERROR: you need to build the step before running it: build_step.sh <STEP_NAME> <COMMIT>"
exit 1
fi
echo "build_hash = \"${BUILD_HASH}\"" >> ${TMP_DIR}/trace.toml
# PARAM HASH (only necessary if Parameters not in GIT repo --> COMMIT)
cd ${PIPE_ROOT}/${RUNTIME_PARAMS}
filenames=$(find "${STEP_NAME}" -type f)
# load libs
source src/run.sh
for filename in ${filenames};
do
sha1sum ${filename} >> ${TMP_DIR}/parameters_checksum.sha1
done
cd ${PIPE_ROOT}
PARAM_HASH=$(sha1sum ${TMP_DIR}/parameters_checksum.sha1)
PARAM_HASH="sha1:${PARAM_HASH%% *}"
echo "parameters_hash = \"${PARAM_HASH}\"" >> ${TMP_DIR}/trace.toml
# ARCHIVE HASH (only necessary if ArchDescr not in GIT repo --> COMMIT)
# validate options
STEPS=`./stoml ${PIPE_ROOT}/config.toml steps`
echo $STEPS
if [[ ! " ${STEPS[*]} " =~ " ${STEP_NAME} " ]]; then
echo "ERROR: the step: ${STEP_NAME} is not defined."
exit 1
fi
ARCHIVE_DEPS=(`./stoml ${PIPE_ROOT}/config.toml steps.${STEP_NAME}.archives`)
if [[ "${#ARCHIVE_DEPS[@]}" != 0 ]];
then
for archive in "${ARCHIVE_DEPS[@]}";
do
cd ${PIPE_ROOT}/${ARCHIVES}
filenames=$(find "${archive}" -type f)
for filename in ${filenames};
do
sha1sum ${filename} >> ${TMP_DIR}/archives_checksum.sha1
done
cd ${PIPE_ROOT}
done;
# setup tmp folder
TMP_HASH=$(gen_hash_word)
TMP_HASH=$(get_hash_from_word ${TMP_HASH})
ARCHIVES_HASH=$(sha1sum ${TMP_DIR}/archives_checksum.sha1)
ARCHIVES_HASH="sha1:${ARCHIVES_HASH%% *}"
else
ARCHIVES_HASH="00000000"
fi
echo "archives_hash = \"${ARCHIVES_HASH}\"" >> ${TMP_DIR}/trace.toml
echo ${TMP_HASH}
# CREATE STEP HASH
TMP_DIR="${PIPE_ROOT}/${ARTIFACTS}/.${STEP_NAME}_${TMP_HASH}"
STEP_HASH_ID="${BUILD_HASH}-${PARAM_HASH}-${ARCHIVES_HASH}"
STEP_HASH=$(echo "${STEP_HASH_ID}" | sha1sum)
STEP_HASH="sha1:${STEP_HASH%% *}"
mkdir ${TMP_DIR}
touch ${TMP_DIR}/trace.toml
echo "[${STEP_NAME}]" >> ${TMP_DIR}/trace.toml
touch ${TMP_DIR}/parameters_checksum.sha1
echo "step_id = \"${STEP_HASH_ID}\"" >> ${TMP_DIR}/trace.toml
echo "step_hash = \"${STEP_HASH}\"" >> ${TMP_DIR}/trace.toml
# set the BUILD HASH
set_build_hash ${STEP_NAME} ${COMMIT}
# CREATE DEPS HASH
echo "build_hash = \"${BUILD_HASH}\"" # >> ${TMP_DIR}/trace.toml
ARTIFACTS_DEPS=("${ARTIFACTS_DIR_HASHES[@]}")
# set the PARAM HASH
set_param_hash ${STEP_NAME} ${TMP_DIR}
#echo "art deps: ${ARTIFACTS_DEPS[@]}"
#echo "nbr art deps: ${#ARTIFACTS_DEPS[@]}"
echo "parameters_hash = \"${PARAM_HASH}\"" # >> ${TMP_DIR}/trace.toml
#
# Set the ARCHIVES HASH
set_archives_hash ${STEP_NAME} ${TMP_DIR}
touch ${TMP_DIR}/trace_dep.toml
echo "archives_hash = \"${ARCHIVES_HASH}\"" # >> ${TMP_DIR}/trace.toml
if [[ ! ${#ARTIFACTS_DEPS[@]} == 0 ]];
then
# Set the STEP HASH
STEP_HASH_ID="$(get_hash_from_word ${BUILD_HASH})-$(get_hash_from_word ${PARAM_HASH})-$(get_hash_from_word ${ARCHIVES_HASH})"
STEP_HASH=$(gen_hash_word --seed=${STEP_HASH_ID})
DEP_HASHES=()
# get the dep hash
for artifact in "${ARTIFACTS_DEPS[@]}";
do
# extract short hash from directory name
artifact_hash=${artifact: -8}
dep_hash=${artifact: -17: -9}
parent_dep_hash=${artifact: -26: -18}
dep_name=${artifact:0: -27}
# trace them
echo >> ${TMP_DIR}/trace_dep.toml
echo "[[artifacts_dep]]" >> ${TMP_DIR}/trace_dep.toml
echo "step_name = \"${dep_name}\"" >> ${TMP_DIR}/trace_dep.toml
echo "step_hash = \"${dep_hash}\"" >> ${TMP_DIR}/trace_dep.toml
echo "dep_hash = \"${parent_dep_hash}\"" >> ${TMP_DIR}/trace_dep.toml
echo "artifact_hash = \"${artifact_hash}\"" >> ${TMP_DIR}/trace_dep.toml
# read full_dep_hash from folder: artifact_hash.txt
DEP_TRACE_FILENAME=${PIPE_ROOT}/${ARTIFACTS}/${artifact}/trace.toml # should be trace.toml (but I need to reorder the writing)
full_artifact_hash=`./stoml ${DEP_TRACE_FILENAME} ${dep_name}.artifact_hash`
#full_dep_hash=$(date | sha1sum) # TO CORRECT THIS IS JUST FOR TEST [TODO]
#full_dep_hash=${full_dep_hash%% *} # TO CORRECT THIS IS JUST FOR TEST [TODO]
echo "full_artifact_hash = \"${full_artifact_hash}\"" >> ${TMP_DIR}/trace_dep.toml
echo "${full_artifact_hash}"
DEP_HASHES=("${DEP_HASHES[@]}" "${full_artifact_hash}")
done;
echo "dep_hashes: ${DEP_HASHES[@]}"
# sort the hashes
IFS=$'\n'; ordered=($(sort <<< "${DEP_HASHES[*]}")); unset IFS;
printf -v DEP_ID "%s-" "${ordered[@]}" # print the list with - separator
DEP_ID="${DEP_ID%?}" # remove last character (extra -)
DEPS_HASH=$(echo "${DEP_ID}" | sha1sum) # hash the dep_id
DEPS_HASH="sha1:${DEPS_HASH%% *}"
else
DEPS_ID="00000000"
DEPS_HASH="00000000"
fi;
echo "step_id = \"${STEP_HASH_ID}\"" # >> ${TMP_DIR}/trace.toml
echo "step_hash = \"${STEP_HASH}\"" # >> ${TMP_DIR}/trace.toml
# Set the DEPS_HASH
set_deps_hash ${TMP_DIR} ${DEPS_DIR[@]}
echo "dep_id = \"${DEP_ID}\"" >> ${TMP_DIR}/trace.toml
echo "deps_hash = \"${DEPS_HASH}\"" >> ${TMP_DIR}/trace.toml
echo "dep_id = \"${DEPS_ID}\"" # >> ${TMP_DIR}/trace.toml
echo "deps_hash = \"${DEPS_HASH}\"" # >> ${TMP_DIR}/trace.toml
# SET TEMP ARTIFACT HASH
# Set TMP ARTIFACT HASH
ARTIFACT_HASH="00000000"
echo "step_name: ${STEP_NAME}"
echo " dep_hash: ${DEPS_HASH}"
echo "step_hash: ${STEP_HASH}"
echo " art_hash: ${ARTIFACT_HASH}"
# echo " dir name: ${STEP_NAME}-${DEPS_HASH:5:8}-${STEP_HASH:5:8}-${ARTIFACT_HASH:5:8}"
# gen tmp dir name
short_deps_hash=$(get_short_hash_from_word ${DEPS_HASH})
short_step_hash=$(get_short_hash_from_word ${STEP_HASH})
short_art_hash=$(get_short_hash_from_word ${ARTIFACT_HASH})
STEP_DIR_NAME="${STEP_NAME}-${short_deps_hash}-${short_step_hash}"
echo "DIR NAME: ${STEP_DIR_NAME}"
#
# THIS SHOULD GO IN RUN_PIPELINE !!
#
# look in the artifacts directory if the step already exists
ls -d ${PIPE_ROOT}/${ARTIFACTS}/${STEP_DIR_NAME}* &> /dev/null
# run it if it does not exists
if [[ $? == 0 ]];
then
STEP_DIR_NAME=($(ls -d ${PIPE_ROOT}/${ARTIFACTS}/${STEP_DIR_NAME}*))
echo "the Step was run already: ${STEP_DIR_NAME[0]}"
if [[ ${#STEP_DIR_NAME[@]} > 1 ]];
then
echo
echo "WARNING:"
echo "--------"
echo " The step ${STEP_NAME} is not reproducible: "
echo " There are several Artifacts with the same InputSignature but different OutputSignature."
echo " Are you using some sort of random generator to produce the data?"
echo ""
echo " The results of $(basename ${STEP_DIR_NAME[0]}) will be used for the rest of the Pipeline."
echo ""
fi;
exit 0
fi;
#
# END
#
ARTIFACT_HASH=$(date | sha1sum) # TO CORRECT THIS IS JUST FOR TEST [TODO]
ARTIFACT_HASH="sha1:${ARTIFACT_HASH%% *}" # TO CORRECT THIS IS JUST FOR TEST [TODO]
# add artifact hash to the trace
echo "artifact_hash = \"${ARTIFACT_HASH}\"" >> ${TMP_DIR}/trace.toml
# run the step if it does not exist
# ARTIFACT_HASH=$(gen_hash_word --seed ${STEP_DIR_NAME})
ARTIFACT_HASH=$(gen_hash_word)
echo "artifact_hash = \"${ARTIFACT_HASH}\"" # >> ${TMP_DIR}/trace.toml
# add the dependancies to the trace
cat ${TMP_DIR}/trace_dep.toml >> ${TMP_DIR}/trace.toml
cat ${TMP_DIR}/trace_deps.toml >> ${TMP_DIR}/trace.toml
# rename the folder with the artifact short-hash
deps_hash=${DEPS_HASH#*:} # remove algo (sha1, sha256, md5 ...)
short_deps_hash=${deps_hash::8} # short form
step_hash=${STEP_HASH#*:} # remove algo (sha1, sha256, md5 ...)
short_step_hash=${step_hash::8} # short form
art_hash=${ARTIFACT_HASH#*:} # remove algo (sha1, sha256, md5 ...)
short_art_hash=${art_hash::8} # short form
short_deps_hash=$(get_short_hash_from_word ${DEPS_HASH})
short_step_hash=$(get_short_hash_from_word ${STEP_HASH})
short_art_hash=$(get_short_hash_from_word ${ARTIFACT_HASH})
STEP_DIR_NAME=${STEP_NAME}-${short_deps_hash}-${short_step_hash}-${short_art_hash}
mv ${TMP_DIR} ${PIPE_ROOT}/${ARTIFACTS}/${STEP_DIR_NAME}
......@@ -198,6 +125,3 @@ mv ${TMP_DIR} ${PIPE_ROOT}/${ARTIFACTS}/${STEP_DIR_NAME}
echo "${STEP_DIR_NAME}"
exit 0
#PARAM_HASH=
......@@ -12,11 +12,12 @@ function gen_dir_checksum() {
if [ ! -d $(dirname ${OUTPUT_FILENAME}) ];
then
echo "ERROR: the path to $(basename ${OUTPUT_FILENAME}) does not exist. $(dirname ${OUTPUT_FILENAME})"
return 1
fi;
local PWD=`pwd`
local WD=`pwd`
local DIR="${DIR_PATH##*/}"
cd $(realpath ${DIR_PATH}/..)
filenames=$(find "${DIR}" -type f) # find all files (hidden and backup)
......@@ -28,7 +29,7 @@ function gen_dir_checksum() {
sha1sum ${filename} >> ${OUTPUT_FILENAME}
done
cd ${PWD}
cd ${WD}
return 0
}
......@@ -49,6 +50,26 @@ function get_short_hash_from_word() {
return 0
}
function gen_hash_from_checksum() {
# gen a hash word from a checksum file
local CHECKSUM_FILENAME=$1
local HASH=""
if [ ! -e ${CHECKSUM_FILENAME} ];
then
echo "ERROR: the checksum file to hash does not exists. ${CHECKSUM_FILENAME}"
return 1
fi;
HASH=$(sha1sum ${CHECKSUM_FILENAME})
HASH="sha1:${HASH%% *}"
echo ${HASH}
return 0
}
function gen_hash_word() {
local HASH=""
......@@ -104,3 +125,135 @@ function gen_hash_word() {
return 0
}
function set_build_hash() {
# Set the global variable BUILD_HASH to its value
#
# ARGS:
# STEP_NAME: the name of the image
# COMMIT: the tag of the image
local STEP_NAME=$1
local COMMIT=$2 # long commit
# global variable
BUILD_HASH=$(docker inspect ${STEP_NAME}:${COMMIT} --format="{{.Id}}")
if [ -z ${BUILD_HASH} ];
then
echo "ERROR: you need to build the step before running it: build_step.sh <STEP_NAME> <COMMIT>"
return 1
fi
return 0
}
function set_param_hash() {
# Set the global variable BUILD_HASH to its value
#
# ARGS:
#
local STEP_NAME=$1
local TMP_DIR=$2
gen_dir_checksum ${PIPE_ROOT}/${RUNTIME_PARAMS}/${STEP_NAME} ${TMP_DIR}/parameters_checksum.sha1
if [[ $? != 0 ]]; then return $?; fi;
PARAM_HASH=$(gen_hash_from_checksum ${TMP_DIR}/parameters_checksum.sha1)
if [ -z ${PARAM_HASH} ]; then echo "ERROR: the param hash could not be generated"; return 1; fi;
return 0
}
function set_archives_hash() {
local STEP_NAME=$1
local TMP_DIR=$2
local ARCHIVE_DEPS=(`./stoml ${PIPE_ROOT}/config.toml steps.${STEP_NAME}.archives`)
if [[ "${#ARCHIVE_DEPS[@]}" != 0 ]];
then
for archive in "${ARCHIVE_DEPS[@]}";
do
gen_dir_checksum ${PIPE_ROOT}/${ARCHIVES}/${archive} ${TMP_DIR}/archives_checksum.sha1
if [[ $? != 0 ]]; then return $?; fi;
done;
ARCHIVES_HASH=$(gen_hash_from_checksum ${TMP_DIR}/archives_checksum.sha1)
else
ARCHIVES_HASH="00000000"
fi
if [ -z ${ARCHIVES_HASH} ]; then echo "ERROR: the archives hash could not be generated"; return 1; fi;
return 0
}
function set_deps_hash() {
local TMP_DIR=$1
shift;
local ARTIFACTS_DEPS=($@)
touch ${TMP_DIR}/trace_deps.toml
if [[ ! ${#ARTIFACTS_DEPS[@]} == 0 ]];
then
local DEP_HASHES=()
# get the dep hash
for artifact in "${ARTIFACTS_DEPS[@]}";
do
# extract short hash from directory name
local artifact_hash=${artifact: -8}
local dep_hash=${artifact: -17: -9}
local parent_dep_hash=${artifact: -26: -18}
local dep_name=${artifact:0: -27}
# trace them
echo >> ${TMP_DIR}/trace_deps.toml
echo "[[artifacts_dep]]" >> ${TMP_DIR}/trace_deps.toml
echo "step_name = \"${dep_name}\"" >> ${TMP_DIR}/trace_deps.toml
echo "step_hash = \"${dep_hash}\"" >> ${TMP_DIR}/trace_deps.toml
echo "dep_hash = \"${parent_dep_hash}\"" >> ${TMP_DIR}/trace_deps.toml
echo "artifact_hash = \"${artifact_hash}\"" >> ${TMP_DIR}/trace_deps.toml
# read full_dep_hash from folder: trace.toml
local DEP_TRACE_FILENAME=${PIPE_ROOT}/${ARTIFACTS}/${artifact}/trace.toml
local full_artifact_hash=`./stoml ${DEP_TRACE_FILENAME} ${dep_name}.artifact_hash`
echo "full_artifact_hash = \"${full_artifact_hash}\"" >> ${TMP_DIR}/trace_deps.toml
local DEP_HASHES=("${DEP_HASHES[@]}" "$(get_hash_from_word ${full_artifact_hash})")
done;
echo "dep_hashes: ${DEP_HASHES[@]}"
# sort the hashes
IFS=$'\n'; local ordered=($(sort <<< "${DEP_HASHES[*]}")); unset IFS;
printf -v DEP_ID "%s-" "${ordered[@]}" # print the list with - separator
DEPS_ID="${DEP_ID%?}" # remove last character (extra -)
DEPS_HASH=$(gen_hash_word --seed=${DEP_ID})
else
DEPS_ID="00000000"
DEPS_HASH="00000000"
fi;
return 0
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment