Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Introduce logs in json format using log/slog #102

Merged
merged 3 commits into from
Dec 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 30 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"context"
"errors"
"fmt"
"log"
"log/slog"
"os"
"strings"
"time"
Expand Down Expand Up @@ -36,9 +36,24 @@ var (
listTimeoutSeconds = int64(60)
executionFailedCounter = 0

debug = os.Getenv("DEBUG") == "true"
logger *slog.Logger // Global logger
programLevel slog.LevelVar // Info by default
)

func init() {
// Create a new logger, either in JSON or text format
if os.Getenv("JSON_LOG") == "true" {
logger = slog.New(slog.NewJSONHandler(os.Stderr, nil))
} else {
logger = slog.New(slog.NewTextHandler(os.Stderr, nil))
}

// Set the log level based on the DEBUG environment variable
if os.Getenv("DEBUG") == "true" {
programLevel.Set(slog.LevelDebug)
}
}

func main() {
for {
start := time.Now()
Expand All @@ -47,17 +62,17 @@ func main() {
panic("failed to create Kubernetes clients: " + err.Error())
}
eventManager := kevent.NewEventManager(kubernetesClient, "k8s-ttl-controller")
if err := Reconcile(kubernetesClient, dynamicClient, eventManager); err != nil {
log.Printf("Error during execution: %s", err.Error())
if err = Reconcile(kubernetesClient, dynamicClient, eventManager); err != nil {
logger.Info(fmt.Sprintf("Error during execution: %s", err.Error()))
executionFailedCounter++
if executionFailedCounter > MaximumFailedExecutionBeforePanic {
panic(fmt.Errorf("execution failed %d times: %w", executionFailedCounter, err))
}
} else if executionFailedCounter > 0 {
log.Printf("Execution was successful after %d failed attempts, resetting counter to 0", executionFailedCounter)
logger.Info(fmt.Sprintf("Execution was successful after %d failed attempts, resetting counter to 0", executionFailedCounter))
executionFailedCounter = 0
}
log.Printf("Execution took %dms, sleeping for %s", time.Since(start).Milliseconds(), ExecutionInterval)
logger.Info(fmt.Sprintf("Execution took %dms, sleeping for %s", time.Since(start).Milliseconds(), ExecutionInterval))
time.Sleep(ExecutionInterval)
}
}
Expand All @@ -71,9 +86,7 @@ func Reconcile(kubernetesClient kubernetes.Interface, dynamicClient dynamic.Inte
if err != nil {
return err
}
if debug {
log.Println("[Reconcile] Found", len(resources), "API resources")
}
logger.Debug(fmt.Sprintf("[Reconcile] Found %d API resources", len(resources)))
timeout := make(chan bool, 1)
result := make(chan bool, 1)
go func() {
Expand All @@ -98,7 +111,7 @@ func getStartTime(item unstructured.Unstructured) metav1.Time {
if err == nil {
return metav1.NewTime(t)
}
log.Printf("Failed to parse refreshed-at timestamp '%s' for %s/%s: %s", refreshedAt, item.GetKind(), item.GetName(), err)
logger.Info("Failed to parse refreshed-at timestamp '%s' for %s/%s: %s", refreshedAt, item.GetKind(), item.GetName(), err)
}
return item.GetCreationTimestamp()
}
Expand Down Expand Up @@ -134,42 +147,40 @@ func DoReconcile(dynamicClient dynamic.Interface, eventManager *kevent.EventMana
for list == nil || continueToken != "" {
list, err = dynamicClient.Resource(gvr).List(context.TODO(), metav1.ListOptions{TimeoutSeconds: &listTimeoutSeconds, Continue: continueToken, Limit: ListLimit})
if err != nil {
log.Printf("Error checking %s from %s: %s", gvr.Resource, gvr.GroupVersion(), err)
logger.Info(fmt.Sprintf("Error checking %s from %s: %s", gvr.Resource, gvr.GroupVersion(), err))
continue
}
if list != nil {
continueToken = list.GetContinue()
}
if debug {
log.Println("Checking", len(list.Items), gvr.Resource, "from", gvr.GroupVersion())
}
logger.Debug(fmt.Sprintf("Checking %d %s from %s", len(list.Items), gvr.Resource, gvr.GroupVersion()))
for _, item := range list.Items {
ttl, exists := item.GetAnnotations()[AnnotationTTL]
if !exists {
continue
}
ttlInDuration, err = str2duration.ParseDuration(ttl)
if err != nil {
log.Printf("[%s/%s] has an invalid TTL '%s': %s\n", apiResource.Name, item.GetName(), ttl, err)
logger.Info(fmt.Sprintf("[%s/%s] has an invalid TTL '%s': %s", apiResource.Name, item.GetName(), ttl, err))
continue
}
ttlExpired := time.Now().After(getStartTime(item).Add(ttlInDuration))
if ttlExpired {
durationSinceExpired := time.Since(getStartTime(item).Add(ttlInDuration)).Round(time.Second)
log.Printf("[%s/%s] is configured with a TTL of %s, which means it has expired %s ago", apiResource.Name, item.GetName(), ttl, durationSinceExpired)
logger.Info(fmt.Sprintf("[%s/%s] is configured with a TTL of %s, which means it has expired %s ago", apiResource.Name, item.GetName(), ttl, durationSinceExpired))
err = dynamicClient.Resource(gvr).Namespace(item.GetNamespace()).Delete(context.TODO(), item.GetName(), metav1.DeleteOptions{})
if err != nil {
log.Printf("[%s/%s] failed to delete: %s\n", apiResource.Name, item.GetName(), err)
logger.Info(fmt.Sprintf("[%s/%s] failed to delete: %s", apiResource.Name, item.GetName(), err))
eventManager.Create(item.GetNamespace(), item.GetKind(), item.GetName(), "FailedToDeleteExpiredTTL", "Unable to delete expired resource:"+err.Error(), true)
// XXX: Should we retry with GracePeriodSeconds set to &0 to force immediate deletion after the first attempt failed?
} else {
log.Printf("[%s/%s] deleted", apiResource.Name, item.GetName())
logger.Info(fmt.Sprintf("[%s/%s] deleted", apiResource.Name, item.GetName()))
eventManager.Create(item.GetNamespace(), item.GetKind(), item.GetName(), "DeletedExpiredTTL", "Deleted resource because "+ttl+" or more has elapsed", false)
}
// Cool off a tiny bit to avoid hitting the API too often
time.Sleep(ThrottleDuration)
} else {
log.Printf("[%s/%s] is configured with a TTL of %s, which means it will expire in %s", apiResource.Name, item.GetName(), ttl, time.Until(getStartTime(item).Add(ttlInDuration)).Round(time.Second))
logger.Info(fmt.Sprintf("[%s/%s] is configured with a TTL of %s, which means it will expire in %s", apiResource.Name, item.GetName(), ttl, time.Until(getStartTime(item).Add(ttlInDuration)).Round(time.Second)))
}
}
// Cool off a tiny bit to avoid hitting the API too often
Expand Down
Loading