mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-31 22:35:33 +00:00
docker-test: handle race condition error
There is a race condition in docker/containerd that causes docker to occasionally fail when starting a container from a checkpoint immediately after the checkpoint has been created. This problem is unrelated to criu and has been reported in https://github.com/moby/moby/issues/42900 Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
This commit is contained in:
committed by
Andrei Vagin
parent
49319cd579
commit
2642b657da
@@ -75,17 +75,37 @@ checkpoint_container () {
|
|||||||
docker wait cr
|
docker wait cr
|
||||||
}
|
}
|
||||||
|
|
||||||
restore_container () {
|
print_logs () {
|
||||||
CHECKPOINT_NAME=$1
|
|
||||||
|
|
||||||
docker start --checkpoint "$CHECKPOINT_NAME" cr 2>&1 | tee log || {
|
|
||||||
cat "$(grep log 'log file:' | sed 's/log file:\s*//')" || true
|
cat "$(grep log 'log file:' | sed 's/log file:\s*//')" || true
|
||||||
docker logs cr || true
|
docker logs cr || true
|
||||||
cat $CRIU_LOG || true
|
cat $CRIU_LOG || true
|
||||||
dmesg
|
dmesg
|
||||||
docker ps
|
docker ps
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare -i max_restore_container_tries=3
|
||||||
|
current_iteration=
|
||||||
|
|
||||||
|
restore_container () {
|
||||||
|
CHECKPOINT_NAME=$1
|
||||||
|
|
||||||
|
docker start --checkpoint "$CHECKPOINT_NAME" cr 2>&1 | tee log || {
|
||||||
|
# FIXME: There is a race condition in docker/containerd that causes
|
||||||
|
# docker to occasionally fail when starting a container from a
|
||||||
|
# checkpoint immediately after the checkpoint has been created.
|
||||||
|
# https://github.com/moby/moby/issues/42900
|
||||||
|
if [ "$current_iteration" -gt "$max_restore_container_tries" ]; then
|
||||||
|
print_logs
|
||||||
|
fi
|
||||||
|
grep -Eq '^Error response from daemon: failed to upload checkpoint to containerd: commit failed: content sha256:.*: already exists$' log && {
|
||||||
|
((current_iteration+=1))
|
||||||
|
echo "Retry container restore: $current_iteration"
|
||||||
|
sleep 1;
|
||||||
|
restore_container "$CHECKPOINT_NAME"
|
||||||
|
} ||
|
||||||
|
print_logs
|
||||||
|
} && current_iteration=0
|
||||||
}
|
}
|
||||||
|
|
||||||
# Scenario: Create multiple containers and checkpoint and restore them once
|
# Scenario: Create multiple containers and checkpoint and restore them once
|
||||||
|
Reference in New Issue
Block a user