Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce flakiness in evict-agent CI #5386

Merged
merged 15 commits into from
Aug 24, 2024
20 changes: 10 additions & 10 deletions test/integration/suites/evict-agent/04-ban-agent
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

log-debug "banning agent..."

docker compose exec -T spire-server \
/opt/spire/bin/spire-server agent ban \
-spiffeID "spiffe://domain.test/spire/agent/x509pop/$(fingerprint conf/agent/agent.crt.pem)"

# Check at most 30 times (with one second in between) that the agent has
# successfully banned
# Attempt at most 30 times (with one second in between) to ban the agent
MAXCHECKS=30
CHECKINTERVAL=1
spiffe_id="spiffe://domain.test/spire/agent/x509pop/$(fingerprint conf/agent/agent.crt.pem)"
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for agent is shutting down ($i of $MAXCHECKS max)..."
docker compose logs spire-agent
if docker compose logs spire-agent | grep "Agent is banned: removing SVID and shutting down"; then
log-info "attempting to ban agent ${spiffe_id} ($i of $MAXCHECKS max)..."

docker compose exec -T spire-server \
/opt/spire/bin/spire-server agent ban \
-spiffeID "${spiffe_id}"
docker compose logs spire-server
if docker compose logs spire-server | grep "Agent banned"; then
exit 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to shutdown"
fail-now "timed out waiting for successful ban"
16 changes: 16 additions & 0 deletions test/integration/suites/evict-agent/05-agent-is-banned
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

# Check at most 30 times (with one second in between) that the agent has
# been successfully banned
MAXCHECKS=30
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for agent is shutting down due to being banned ($i of $MAXCHECKS max)..."
docker compose logs spire-agent
if docker compose logs spire-agent | grep "Agent is banned: removing SVID and shutting down"; then
exit 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to shutdown"
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
log-debug "starting agent again..."
docker-up spire-agent

# Check at most 30 times (with one second in between) that the agent is not able to get new
# Check at most 30 times (with one second in between) that the agent is not able to get new
# workload entries.
MAXCHECKS=30
CHECKINTERVAL=1
Expand Down
7 changes: 0 additions & 7 deletions test/integration/suites/evict-agent/06-delete-agent

This file was deleted.

20 changes: 20 additions & 0 deletions test/integration/suites/evict-agent/07-evict-agent
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

log-debug "evicting (deleting) agent to re-enable attestation..."

# Check at most 30 times (with one second in between) that we can evict the agent
MAXCHECKS=30
CHECKINTERVAL=1
spiffe_id="spiffe://domain.test/spire/agent/x509pop/$(fingerprint conf/agent/agent.crt.pem)"
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "attempting to evict agent ${spiffe_id} ($i of $MAXCHECKS max)..."

docker compose exec -T spire-server \
/opt/spire/bin/spire-server agent evict \
-spiffeID ${spiffe_id}
docker compose logs spire-server
if docker compose logs spire-server | grep "Agent deleted"; then
exit 0
fi
sleep "${CHECKINTERVAL}"
done
20 changes: 20 additions & 0 deletions test/integration/suites/evict-agent/08-agent-reattest-attempt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

log-debug "agent re-attesting..."

# Check at most 30 times (with one second in between) that the agent knows it can re-attest.
# This is not true "re-attestation" since when the agent was banned it removed its own SVID.
MAXCHECKS=30
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for agent to get notification and try to reattest ($i of $MAXCHECKS max)..."
log-debug "starting agent again..."
docker-up spire-agent
docker compose logs spire-agent
if docker compose logs spire-agent | grep "SVID is not found. Starting node attestation"; then
exit 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to try to re-attest"
44 changes: 0 additions & 44 deletions test/integration/suites/evict-agent/08-evict-agent

This file was deleted.

15 changes: 15 additions & 0 deletions test/integration/suites/evict-agent/09-agent-reattested
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

# Check at most 30 times (with one second in between) that the agent has re-attested
MAXCHECKS=30
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for agent to get notification that it re-attested ($i of $MAXCHECKS max)..."
docker compose logs spire-agent
if docker compose logs spire-agent | grep "Node attestation was successful"; then
exit 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to re-attest"
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/bin/bash

log-debug "starting agent again..."

log-debug "bringing agent down..."
docker-down spire-agent
log-debug "starting agent again..."
docker-up spire-agent

# Check at most 30 times (with one second in between) that the agent is back up
Expand Down
Loading