From 173859b80060ee2e3647d83bb6897a64c2e1615d Mon Sep 17 00:00:00 2001 From: ElectroNafta Date: Mon, 30 Sep 2024 16:23:14 +0200 Subject: [PATCH] feat(BRIDGE-218): observability support & metrics; reviewed remaining sentry events --- builder.go | 4 ++ internal/backend/backend.go | 8 ++-- internal/backend/user.go | 4 ++ internal/db_impl/sqlite3/client.go | 6 +++ internal/session/command.go | 3 ++ internal/session/handle_append.go | 1 + internal/session/handle_copy.go | 3 ++ internal/session/handle_create.go | 3 ++ internal/session/handle_delete.go | 3 ++ internal/session/handle_fetch.go | 1 + internal/session/handle_move.go | 3 ++ internal/session/handle_store.go | 2 + internal/session/session.go | 1 + internal/state/actions.go | 17 ++----- internal/state/responders.go | 1 + internal/state/state.go | 1 + observability/context.go | 22 +++++++++ observability/metrics/metrics.go | 73 ++++++++++++++++++++++++++++++ observability/observability.go | 16 +++++++ observability/utils.go | 30 ++++++++++++ option.go | 14 ++++++ server.go | 5 ++ 22 files changed, 204 insertions(+), 17 deletions(-) create mode 100644 observability/context.go create mode 100644 observability/metrics/metrics.go create mode 100644 observability/observability.go create mode 100644 observability/utils.go diff --git a/builder.go b/builder.go index fdff8c81..2cf9cf90 100644 --- a/builder.go +++ b/builder.go @@ -14,6 +14,7 @@ import ( "github.com/ProtonMail/gluon/internal/db_impl/sqlite3" "github.com/ProtonMail/gluon/internal/session" "github.com/ProtonMail/gluon/limits" + "github.com/ProtonMail/gluon/observability" "github.com/ProtonMail/gluon/profiling" "github.com/ProtonMail/gluon/reporter" "github.com/ProtonMail/gluon/store" @@ -39,6 +40,7 @@ type serverBuilder struct { uidValidityGenerator imap.UIDValidityGenerator panicHandler async.PanicHandler dbCI db.ClientInterface + observabilitySender observability.Sender } func newBuilder() (*serverBuilder, error) { @@ -52,6 +54,7 @@ func newBuilder() (*serverBuilder, error) { uidValidityGenerator: imap.DefaultEpochUIDValidityGenerator(), panicHandler: async.NoopPanicHandler{}, dbCI: sqlite3.NewBuilder(), + observabilitySender: nil, }, nil } @@ -121,6 +124,7 @@ func (builder *serverBuilder) build() (*Server, error) { disableParallelism: builder.disableParallelism, uidValidityGenerator: builder.uidValidityGenerator, panicHandler: builder.panicHandler, + observabilitySender: builder.observabilitySender, } return s, nil diff --git a/internal/backend/backend.go b/internal/backend/backend.go index 2e3bf37f..0c05f456 100644 --- a/internal/backend/backend.go +++ b/internal/backend/backend.go @@ -14,6 +14,8 @@ import ( "github.com/ProtonMail/gluon/imap" "github.com/ProtonMail/gluon/internal/state" "github.com/ProtonMail/gluon/limits" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/reporter" "github.com/ProtonMail/gluon/store" "github.com/google/uuid" @@ -121,9 +123,7 @@ func (b *Backend) AddUser(ctx context.Context, userID string, conn connector.Con } b.log.WithError(err).Errorf("First database migration failed") - reporter.ExceptionWithContext(ctx, "database migration failed", reporter.Context{ - "error": err, - }) + observability.AddOtherMetric(ctx, metrics.GenerateDatabaseMigrationFailed()) b.log.Debugf("First migration failed, recreating database") @@ -180,6 +180,7 @@ func (b *Backend) RemoveUser(ctx context.Context, userID string, removeFiles boo } if err := user.close(ctx); err != nil { + // no events in Sentry so far. reporter.MessageWithContext(ctx, "Failed to close user from Backend.RemoveUser()", reporter.Context{"error": err}, @@ -277,6 +278,7 @@ func (b *Backend) Close(ctx context.Context) error { for userID, user := range b.users { if err := user.close(ctx); err != nil { + // there's no events like this in sentry so far. reporter.MessageWithContext(ctx, "Failed to close user from Backend.Close()", reporter.Context{"error": err}, diff --git a/internal/backend/user.go b/internal/backend/user.go index 3412f675..76e5f3a3 100644 --- a/internal/backend/user.go +++ b/internal/backend/user.go @@ -14,6 +14,8 @@ import ( "github.com/ProtonMail/gluon/internal/utils" "github.com/ProtonMail/gluon/limits" "github.com/ProtonMail/gluon/logging" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/reporter" "github.com/ProtonMail/gluon/store" "github.com/bradenaw/juniper/xslices" @@ -149,6 +151,7 @@ func newUser( if err := user.deleteAllMessagesMarkedDeleted(ctx); err != nil { log.WithError(err).Error("Failed to remove deleted messages") + observability.AddMessageRelatedMetric(ctx, metrics.GenerateFailedToRemoveDeletedMessagesMetric()) } if err := user.cleanupStaleStoreData(ctx); err != nil { @@ -171,6 +174,7 @@ func newUser( } if err := user.apply(ctx, update); err != nil { + // there's no events like this in sentry so far. reporter.MessageWithContext(ctx, "Failed to apply connector update", reporter.Context{"error": err, "update": update.String()}, diff --git a/internal/db_impl/sqlite3/client.go b/internal/db_impl/sqlite3/client.go index 7e82f36d..6873b578 100644 --- a/internal/db_impl/sqlite3/client.go +++ b/internal/db_impl/sqlite3/client.go @@ -14,6 +14,8 @@ import ( "github.com/ProtonMail/gluon/db" "github.com/ProtonMail/gluon/imap" "github.com/ProtonMail/gluon/internal/db_impl/sqlite3/utils" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/google/uuid" _ "github.com/mattn/go-sqlite3" "github.com/sirupsen/logrus" @@ -191,6 +193,10 @@ func (c *Client) wrapTx(ctx context.Context, op func(context.Context, *sql.Tx, * } if err := tx.Commit(); err != nil { + if !errors.Is(err, context.Canceled) { + observability.AddOtherMetric(ctx, metrics.GenerateFailedToCommitDatabaseTransactionMetric()) + } + if c.debug { entry.Debugf("Failed to commit Transaction") } diff --git a/internal/session/command.go b/internal/session/command.go index 18025dd4..c95e35d6 100644 --- a/internal/session/command.go +++ b/internal/session/command.go @@ -9,6 +9,8 @@ import ( "github.com/ProtonMail/gluon/imap/command" "github.com/ProtonMail/gluon/internal/response" "github.com/ProtonMail/gluon/logging" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/rfcparser" ) @@ -62,6 +64,7 @@ func (s *Session) startCommandReader(ctx context.Context) <-chan commandResult { } s.log.WithError(err).WithField("type", parser.LastParsedCommand()).Error("Failed to parse IMAP command") + observability.AddImapMetric(ctx, metrics.GenerateFailedParseIMAPCommandMetric()) } else { s.log.Debug(cmd.SanitizedString()) } diff --git a/internal/session/handle_append.go b/internal/session/handle_append.go index 93aa3413..ba9e36d2 100644 --- a/internal/session/handle_append.go +++ b/internal/session/handle_append.go @@ -40,6 +40,7 @@ func (s *Session) handleAppend(ctx context.Context, tag string, cmd *command.App messageUID, err := mailbox.Append(ctx, cmd.Literal, flags, cmd.DateTime) if err != nil { + // no events in sentry so far. if shouldReportIMAPCommandError(err) { reporter.MessageWithContext(ctx, "Failed to append message to mailbox from state", diff --git a/internal/session/handle_copy.go b/internal/session/handle_copy.go index 4169e1f4..30f8e89b 100644 --- a/internal/session/handle_copy.go +++ b/internal/session/handle_copy.go @@ -8,6 +8,8 @@ import ( "github.com/ProtonMail/gluon/internal/contexts" "github.com/ProtonMail/gluon/internal/response" "github.com/ProtonMail/gluon/internal/state" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/profiling" ) @@ -37,6 +39,7 @@ func (s *Session) handleCopy(ctx context.Context, tag string, cmd *command.Copy, } else if errors.Is(err, state.ErrNoSuchMailbox) { return response.No(tag).WithError(err).WithItems(response.ItemTryCreate()), nil } else if err != nil { + observability.AddMessageRelatedMetric(ctx, metrics.GenerateFailedToCopyMessagesMetric()) return nil, err } diff --git a/internal/session/handle_create.go b/internal/session/handle_create.go index b2a51013..6a2ca074 100644 --- a/internal/session/handle_create.go +++ b/internal/session/handle_create.go @@ -7,6 +7,8 @@ import ( "github.com/ProtonMail/gluon/imap" "github.com/ProtonMail/gluon/imap/command" "github.com/ProtonMail/gluon/internal/response" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/profiling" ) @@ -24,6 +26,7 @@ func (s *Session) handleCreate(ctx context.Context, tag string, cmd *command.Cre } if err := s.state.Create(ctx, nameUTF8); err != nil { + observability.AddMessageRelatedMetric(ctx, metrics.GenerateFailedToCreateMailbox()) return err } diff --git a/internal/session/handle_delete.go b/internal/session/handle_delete.go index 09f04ec5..9b41c8c0 100644 --- a/internal/session/handle_delete.go +++ b/internal/session/handle_delete.go @@ -7,6 +7,8 @@ import ( "github.com/ProtonMail/gluon/imap" "github.com/ProtonMail/gluon/imap/command" "github.com/ProtonMail/gluon/internal/response" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/profiling" ) @@ -25,6 +27,7 @@ func (s *Session) handleDelete(ctx context.Context, tag string, cmd *command.Del selectedDeleted, err := s.state.Delete(ctx, nameUTF8) if err != nil { + observability.AddOtherMetric(ctx, metrics.GenerateFailedToDeleteMailboxMetric()) return err } diff --git a/internal/session/handle_fetch.go b/internal/session/handle_fetch.go index c87aa95a..4977fb60 100644 --- a/internal/session/handle_fetch.go +++ b/internal/session/handle_fetch.go @@ -25,6 +25,7 @@ func (s *Session) handleFetch(ctx context.Context, tag string, cmd *command.Fetc return response.Bad(tag).WithError(err), nil } else if err != nil { if shouldReportIMAPCommandError(err) { + // there's no events like this in sentry so far. reporter.MessageWithContext(ctx, "Failed to fetch messages", reporter.Context{"error": err}, diff --git a/internal/session/handle_move.go b/internal/session/handle_move.go index ec047e82..ebf6c628 100644 --- a/internal/session/handle_move.go +++ b/internal/session/handle_move.go @@ -8,6 +8,8 @@ import ( "github.com/ProtonMail/gluon/internal/contexts" "github.com/ProtonMail/gluon/internal/response" "github.com/ProtonMail/gluon/internal/state" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/profiling" ) @@ -35,6 +37,7 @@ func (s *Session) handleMove(ctx context.Context, tag string, cmd *command.Move, } else if errors.Is(err, state.ErrNoSuchMailbox) { return response.No(tag).WithError(err).WithItems(response.ItemTryCreate()), nil } else if err != nil { + observability.AddMessageRelatedMetric(ctx, metrics.GenerateFailedToMoveMessagesFromMailboxMetric()) return nil, err } diff --git a/internal/session/handle_store.go b/internal/session/handle_store.go index 7a9c45fa..a6d9ea4f 100644 --- a/internal/session/handle_store.go +++ b/internal/session/handle_store.go @@ -38,6 +38,8 @@ func (s *Session) handleStore(ctx context.Context, tag string, cmd *command.Stor return response.Bad(tag).WithError(err), nil } else if err != nil { if shouldReportIMAPCommandError(err) { + // A result of either a failed request, or the message does not exist on the remote. + // We've agreed to keep this in sentry. reporter.MessageWithContext(ctx, "Failed to store flags on messages", reporter.Context{"error": err, "mailbox": mailbox.Name(), "action": cmd.Action.String()}, diff --git a/internal/session/session.go b/internal/session/session.go index 65f59fc6..b2344848 100644 --- a/internal/session/session.go +++ b/internal/session/session.go @@ -190,6 +190,7 @@ func (s *Session) serve(ctx context.Context) error { } if s.errorCount += 1; s.errorCount >= maxSessionError { + // there's no events like this in sentry so far. reporter.MessageWithContext(ctx, "Too many errors in session, closing connection", reporter.Context{"ID": s.imapID.String()}, diff --git a/internal/state/actions.go b/internal/state/actions.go index c5a40ad7..03b68706 100644 --- a/internal/state/actions.go +++ b/internal/state/actions.go @@ -10,7 +10,8 @@ import ( "github.com/ProtonMail/gluon/db" "github.com/ProtonMail/gluon/imap" "github.com/ProtonMail/gluon/internal/ids" - "github.com/ProtonMail/gluon/reporter" + "github.com/ProtonMail/gluon/observability" + "github.com/ProtonMail/gluon/observability/metrics" "github.com/ProtonMail/gluon/rfc822" "github.com/bradenaw/juniper/xslices" "golang.org/x/exp/slices" @@ -111,20 +112,8 @@ func (state *State) actionCreateMessage( return nil, 0, knownErr } if knownErr == nil { - // Try to collect the original message date. - var existingMessageDate time.Time - if existingMessage, msgErr := tx.GetMessageNoEdges(ctx, internalID); msgErr == nil { - existingMessageDate = existingMessage.Date - } - if cameFromDrafts { - reporter.ExceptionWithContext(ctx, "Append to drafts must not return an existing RemoteID", reporter.Context{ - "remote-id": res.ID, - "new-date": res.Date, - "original-date": existingMessageDate, - "mailbox": mboxID.RemoteID, - }) - + observability.AddOtherMetric(ctx, metrics.GenerateAppendToDraftsMustNotReturnExistingRemoteID()) state.log.Errorf("Append to drafts must not return an existing RemoteID (Remote=%v, Internal=%v)", res.ID, knownInternalID) return nil, 0, fmt.Errorf("append to drafts returned an existing remote ID") diff --git a/internal/state/responders.go b/internal/state/responders.go index 1bba1e2f..6d5704fd 100644 --- a/internal/state/responders.go +++ b/internal/state/responders.go @@ -106,6 +106,7 @@ func (u *targetedExists) handle(ctx context.Context, snap *snapshot, stateID Sta if u.originStateSet && u.originStateID == stateID { if err := snap.appendMessage(u.resp.messageID, u.resp.messageUID, flags); err != nil { + // No events reported so far. reporter.ExceptionWithContext(ctx, "Failed to append message to snap via targetedExists", reporter.Context{"error": err}) return nil, nil, err } diff --git a/internal/state/state.go b/internal/state/state.go index bfd3c123..62e9ab14 100644 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -624,6 +624,7 @@ func (state *State) ApplyUpdate(ctx context.Context, update Update) error { if err := state.user.GetDB().Write(ctx, func(ctx context.Context, tx db.Transaction) error { return update.Apply(ctx, tx, state) }); err != nil { + // Only one has been reported in Sentry so far... reporter.MessageWithContext(ctx, "Failed to apply state update", reporter.Context{"error": err, "update": update.String()}, diff --git a/observability/context.go b/observability/context.go new file mode 100644 index 00000000..84d94532 --- /dev/null +++ b/observability/context.go @@ -0,0 +1,22 @@ +package observability + +import "context" + +type observabilitySenderKeyType struct{} + +var observabilitySenderKeyVal observabilitySenderKeyType + +func NewContextWithObservabilitySender(ctx context.Context, sender Sender) context.Context { + return context.WithValue(ctx, observabilitySenderKeyVal, sender) +} + +func getObservabilitySenderFromContext(ctx context.Context) (Sender, bool) { + v := ctx.Value(observabilitySenderKeyVal) + if v == nil { + return nil, false + } + + sender, ok := v.(Sender) + + return sender, ok +} diff --git a/observability/metrics/metrics.go b/observability/metrics/metrics.go new file mode 100644 index 00000000..c15107d1 --- /dev/null +++ b/observability/metrics/metrics.go @@ -0,0 +1,73 @@ +package metrics + +import "time" + +const schemaName = "bridge_gluon_errors_total" +const schemaVersion = 1 + +func generateGluonErrorMetric(errorType string) map[string]interface{} { + return map[string]interface{}{ + "Name": schemaName, + "Version": schemaVersion, + "Timestamp": time.Now().Unix(), + "Data": map[string]interface{}{ + "Value": 1, + "Labels": map[string]string{ + "errorType": errorType, + }, + }, + } +} + +func GenerateFailedParseIMAPCommandMetric() map[string]interface{} { + return generateGluonErrorMetric("failedParseIMAPCommand") +} + +func GenerateFailedToCreateMailbox() map[string]interface{} { + return generateGluonErrorMetric("failedCreateMailbox") +} + +func GenerateFailedToDeleteMailboxMetric() map[string]interface{} { + return generateGluonErrorMetric("failedDeleteMailbox") +} + +func GenerateFailedToCopyMessagesMetric() map[string]interface{} { + return generateGluonErrorMetric("failedCopyMessages") +} + +func GenerateFailedToMoveMessagesFromMailboxMetric() map[string]interface{} { + return generateGluonErrorMetric("failedMoveMessagesFromMailbox") +} + +func GenerateFailedToRemoveDeletedMessagesMetric() map[string]interface{} { + return generateGluonErrorMetric("failedRemoveDeletedMessages") +} + +func GenerateFailedToCommitDatabaseTransactionMetric() map[string]interface{} { + return generateGluonErrorMetric("failedCommitDatabaseTransaction") +} + +func GenerateAppendToDraftsMustNotReturnExistingRemoteID() map[string]interface{} { + return generateGluonErrorMetric("appendToDraftsReturnedExistingRemoteID") +} + +func GenerateDatabaseMigrationFailed() map[string]interface{} { + return generateGluonErrorMetric("databaseMigrationFailed") +} + +func GenerateAllMetrics() []map[string]interface{} { + var metrics []map[string]interface{} + metrics = append(metrics, + GenerateFailedParseIMAPCommandMetric(), + GenerateFailedToCreateMailbox(), + GenerateFailedToDeleteMailboxMetric(), + GenerateFailedToCopyMessagesMetric(), + GenerateFailedToMoveMessagesFromMailboxMetric(), + GenerateFailedToRemoveDeletedMessagesMetric(), + GenerateFailedToCommitDatabaseTransactionMetric(), + GenerateAppendToDraftsMustNotReturnExistingRemoteID(), + GenerateDatabaseMigrationFailed(), + ) + + return metrics +} diff --git a/observability/observability.go b/observability/observability.go new file mode 100644 index 00000000..0603de88 --- /dev/null +++ b/observability/observability.go @@ -0,0 +1,16 @@ +package observability + +var imapErrorMetricType int +var messageErrorMetricType int +var otherErrorMetricType int + +type Sender interface { + AddMetrics(metrics ...map[string]interface{}) + AddDistinctMetrics(errType interface{}, metrics ...map[string]interface{}) +} + +func SetupMetricTypes(imapErrorType, messageErrorType, otherErrorType int) { + imapErrorMetricType = imapErrorType + messageErrorMetricType = messageErrorType + otherErrorMetricType = otherErrorType +} diff --git a/observability/utils.go b/observability/utils.go new file mode 100644 index 00000000..b27fbf90 --- /dev/null +++ b/observability/utils.go @@ -0,0 +1,30 @@ +package observability + +import "context" + +func AddImapMetric(ctx context.Context, metric ...map[string]interface{}) { + sender, ok := getObservabilitySenderFromContext(ctx) + if !ok { + return + } + + sender.AddDistinctMetrics(imapErrorMetricType, metric...) +} + +func AddMessageRelatedMetric(ctx context.Context, metric ...map[string]interface{}) { + sender, ok := getObservabilitySenderFromContext(ctx) + if !ok { + return + } + + sender.AddDistinctMetrics(messageErrorMetricType, metric...) +} + +func AddOtherMetric(ctx context.Context, metric ...map[string]interface{}) { + sender, ok := getObservabilitySenderFromContext(ctx) + if !ok { + return + } + + sender.AddDistinctMetrics(otherErrorMetricType, metric...) +} diff --git a/option.go b/option.go index 49b6a211..683785e9 100644 --- a/option.go +++ b/option.go @@ -9,6 +9,7 @@ import ( "github.com/ProtonMail/gluon/db" "github.com/ProtonMail/gluon/imap" limits2 "github.com/ProtonMail/gluon/limits" + "github.com/ProtonMail/gluon/observability" "github.com/ProtonMail/gluon/profiling" "github.com/ProtonMail/gluon/reporter" "github.com/ProtonMail/gluon/store" @@ -243,3 +244,16 @@ func (w withDBClient) config(builder *serverBuilder) { func WithDBClient(ci db.ClientInterface) Option { return &withDBClient{ci: ci} } + +type withObservabilitySender struct { + sender observability.Sender +} + +func (w withObservabilitySender) config(builder *serverBuilder) { + builder.observabilitySender = w.sender +} + +func WithObsSender(sender observability.Sender, imapErrorType, messageErrorType, otherErrorType int) Option { + observability.SetupMetricTypes(imapErrorType, messageErrorType, otherErrorType) + return &withObservabilitySender{sender: sender} +} diff --git a/server.go b/server.go index 5fcbffc5..8abf3782 100644 --- a/server.go +++ b/server.go @@ -19,6 +19,7 @@ import ( "github.com/ProtonMail/gluon/internal/contexts" "github.com/ProtonMail/gluon/internal/session" "github.com/ProtonMail/gluon/logging" + "github.com/ProtonMail/gluon/observability" "github.com/ProtonMail/gluon/profiling" "github.com/ProtonMail/gluon/reporter" "github.com/ProtonMail/gluon/store" @@ -88,6 +89,8 @@ type Server struct { uidValidityGenerator imap.UIDValidityGenerator panicHandler async.PanicHandler + + observabilitySender observability.Sender } // New creates a new server with the given options. @@ -121,6 +124,7 @@ func (s *Server) AddUser(ctx context.Context, conn connector.Connector, passphra // LoadUser adds an existing user using a previously crated unique user ID. // It returns true if the user was newly created, false if it already existed. func (s *Server) LoadUser(ctx context.Context, conn connector.Connector, userID string, passphrase []byte) (bool, error) { + ctx = observability.NewContextWithObservabilitySender(ctx, s.observabilitySender) ctx = reporter.NewContextWithReporter(ctx, s.reporter) isNew, err := s.backend.AddUser(ctx, userID, conn, passphrase, s.uidValidityGenerator) @@ -172,6 +176,7 @@ func (s *Server) AddWatcher(ofType ...events.Event) <-chan events.Event { // Serve serves connections accepted from the given listener. // It stops serving when the context is canceled, the listener is closed, or the server is closed. func (s *Server) Serve(ctx context.Context, l net.Listener) error { + ctx = observability.NewContextWithObservabilitySender(ctx, s.observabilitySender) ctx = reporter.NewContextWithReporter(ctx, s.reporter) ctx = contexts.NewDisableParallelismCtx(ctx, s.disableParallelism)