From 58d20a13ecf50bb5352d54f10b152cabd50bdb91 Mon Sep 17 00:00:00 2001 From: Travis Bischel Date: Sat, 20 Jul 2024 18:52:36 -0600 Subject: [PATCH 1/3] producer: fully account that we are unblocked The select has three cases; two use drainBuffered which properly added -1 to the blocked count. The first case didn't. We now do. Practically, this meant that if you ever were blocked while lingering, you would never linger again. For #726. --- pkg/kgo/producer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/kgo/producer.go b/pkg/kgo/producer.go index cfc6aa14..f09b4712 100644 --- a/pkg/kgo/producer.go +++ b/pkg/kgo/producer.go @@ -448,6 +448,7 @@ func (cl *Client) produce( select { case <-p.waitBuffer: cl.cfg.logger.Log(LogLevelDebug, "Produce block signaled, continuing to produce") + cl.producer.blocked.Add(-1) case <-cl.ctx.Done(): drainBuffered(ErrClientClosed) cl.cfg.logger.Log(LogLevelDebug, "client ctx canceled while blocked in Produce, returning") From 24fbb0f976964c64ac7e545a09f4b0a05c9922f6 Mon Sep 17 00:00:00 2001 From: Travis Bischel Date: Sun, 21 Jul 2024 20:17:29 -0600 Subject: [PATCH 2/3] kgo: fix deadlock in Produce when using MaxBufferedBytes Copying from the issue, """ 1) Produce() record A (100 bytes) 2) Produce() record B (50 bytes), waiting for buffer to free 3) Produce() record C (50 bytes), waiting for buffer to free 4) Record A is produced, finishRecordPromise() gets called, detects it was over the limit so publish 1 message to waitBuffer 5) Record B is unlocked, finishRecordPromise() gets called, does not detect it was over the limit (only 50 bytes), so record C is never unblocked and will wait indefinitely on waitBuffer """ The fix requires adding a lock while producing. This reuses the existing lock on the `producer` type. This can lead to a few more spurious wakeups in other functions that use this same mutex, but that's fine. The prior algorithm counted anything to produce immediately into the buffered records and bytes fields; the fix for #777 could not really be possible unless we avoid counting the "buffered" aspect right away. Specifically, we need to have a goroutine looping with a sync.Cond that checks *IF* we add the record, will we still be blocked? This allows us to wake up all blocked goroutines always (unlike one at a time, the problem this issue points out), and each goroutine can check under a lock if they still do not fit. This also fixes an unreported bug where, if a record WOULD be blocked but fails early due to no topic / not in a transaction while in a transactional client, the serial promise finishing goroutine would deadlock. Closes #777. --- pkg/kgo/consumer_direct_test.go | 6 +- pkg/kgo/produce_request_test.go | 62 +++++++++++ pkg/kgo/producer.go | 188 ++++++++++++++++++++------------ pkg/kgo/sink.go | 2 +- 4 files changed, 187 insertions(+), 71 deletions(-) diff --git a/pkg/kgo/consumer_direct_test.go b/pkg/kgo/consumer_direct_test.go index 884c427b..dc12083d 100644 --- a/pkg/kgo/consumer_direct_test.go +++ b/pkg/kgo/consumer_direct_test.go @@ -338,7 +338,8 @@ func TestPauseIssue489(t *testing.T) { exit.Store(true) } }) - time.Sleep(100 * time.Microsecond) + cl.Flush(ctx) + time.Sleep(50 * time.Microsecond) } }() defer cancel() @@ -416,7 +417,8 @@ func TestPauseIssueOct2023(t *testing.T) { exit.Store(true) } }) - time.Sleep(100 * time.Microsecond) + cl.Flush(ctx) + time.Sleep(50 * time.Microsecond) } }() defer cancel() diff --git a/pkg/kgo/produce_request_test.go b/pkg/kgo/produce_request_test.go index 8a652a2f..2c7cde56 100644 --- a/pkg/kgo/produce_request_test.go +++ b/pkg/kgo/produce_request_test.go @@ -2,13 +2,75 @@ package kgo import ( "bytes" + "context" + "errors" "hash/crc32" + "math/rand" + "strings" + "sync" + "sync/atomic" "testing" "github.com/twmb/franz-go/pkg/kbin" "github.com/twmb/franz-go/pkg/kmsg" ) +func TestClient_Produce(t *testing.T) { + var ( + topic, cleanup = tmpTopicPartitions(t, 1) + numWorkers = 50 + recsToWrite = int64(20_000) + + workers sync.WaitGroup + writeSuccess atomic.Int64 + writeFailure atomic.Int64 + + randRec = func() *Record { + return &Record{ + Key: []byte("test"), + Value: []byte(strings.Repeat("x", rand.Intn(1000))), + Topic: topic, + } + } + ) + defer cleanup() + + cl, _ := newTestClient(MaxBufferedBytes(5000)) + defer cl.Close() + + // Start N workers that will concurrently write to the same partition. + var recsWritten atomic.Int64 + var fatal atomic.Bool + for i := 0; i < numWorkers; i++ { + workers.Add(1) + + go func() { + defer workers.Done() + + for recsWritten.Add(1) <= recsToWrite { + res := cl.ProduceSync(context.Background(), randRec()) + if err := res.FirstErr(); err == nil { + writeSuccess.Add(1) + } else { + if !errors.Is(err, ErrMaxBuffered) { + t.Errorf("unexpected error: %v", err) + fatal.Store(true) + } + + writeFailure.Add(1) + } + } + }() + } + workers.Wait() + + t.Logf("writes succeeded: %d", writeSuccess.Load()) + t.Logf("writes failed: %d", writeFailure.Load()) + if fatal.Load() { + t.Fatal("failed") + } +} + // This file contains golden tests against kmsg AppendTo's to ensure our custom // encoding is correct. diff --git a/pkg/kgo/producer.go b/pkg/kgo/producer.go index f09b4712..ce8fdb61 100644 --- a/pkg/kgo/producer.go +++ b/pkg/kgo/producer.go @@ -14,9 +14,15 @@ import ( ) type producer struct { - bufferedRecords atomicI64 - bufferedBytes atomicI64 - inflight atomicI64 // high 16: # waiters, low 48: # inflight + inflight atomicI64 // high 16: # waiters, low 48: # inflight + + // mu and c are used for flush and drain notifications; mu is used for + // a few other tight locks. + mu sync.Mutex + c *sync.Cond + + bufferedRecords int64 + bufferedBytes int64 cl *Client @@ -45,19 +51,14 @@ type producer struct { // We must have a producer field for flushing; we cannot just have a // field on recBufs that is toggled on flush. If we did, then a new // recBuf could be created and records sent to while we are flushing. - flushing atomicI32 // >0 if flushing, can Flush many times concurrently - blocked atomicI32 // >0 if over max recs or bytes + flushing atomicI32 // >0 if flushing, can Flush many times concurrently + blocked atomicI32 // >0 if over max recs or bytes + blockedBytes int64 aborting atomicI32 // >0 if aborting, can abort many times concurrently - idMu sync.Mutex - idVersion int16 - waitBuffer chan struct{} - - // mu and c are used for flush and drain notifications; mu is used for - // a few other tight locks. - mu sync.Mutex - c *sync.Cond + idMu sync.Mutex + idVersion int16 batchPromises ringBatchPromise promisesMu sync.Mutex @@ -86,14 +87,18 @@ type producer struct { // flushing records produced by your client (which can help determine network / // cluster health). func (cl *Client) BufferedProduceRecords() int64 { - return cl.producer.bufferedRecords.Load() + cl.producer.mu.Lock() + defer cl.producer.mu.Unlock() + return cl.producer.bufferedRecords + int64(cl.producer.blocked.Load()) } // BufferedProduceBytes returns the number of bytes currently buffered for // producing within the client. This is the sum of all keys, values, and header // keys/values. See the related [BufferedProduceRecords] for more information. func (cl *Client) BufferedProduceBytes() int64 { - return cl.producer.bufferedBytes.Load() + cl.producer.mu.Lock() + defer cl.producer.mu.Unlock() + return cl.producer.bufferedBytes + cl.producer.blockedBytes } type unknownTopicProduces struct { @@ -106,7 +111,6 @@ func (p *producer) init(cl *Client) { p.cl = cl p.topics = newTopicsPartitions() p.unknownTopics = make(map[string]*unknownTopicProduces) - p.waitBuffer = make(chan struct{}, math.MaxInt32) p.idVersion = -1 p.id.Store(&producerID{ id: -1, @@ -397,58 +401,93 @@ func (cl *Client) produce( } } - var ( - userSize = r.userSize() - bufRecs = p.bufferedRecords.Add(1) - bufBytes = p.bufferedBytes.Add(userSize) - overMaxRecs = bufRecs > cl.cfg.maxBufferedRecords - overMaxBytes bool - ) - if cl.cfg.maxBufferedBytes > 0 { - if userSize > cl.cfg.maxBufferedBytes { - p.promiseRecord(promisedRec{ctx, promise, r}, kerr.MessageTooLarge) - return - } - overMaxBytes = bufBytes > cl.cfg.maxBufferedBytes - } - + // We can now fail the rec after the buffered hook. if r.Topic == "" { - p.promiseRecord(promisedRec{ctx, promise, r}, errNoTopic) + p.promiseRecordBeforeBuf(promisedRec{ctx, promise, r}, errNoTopic) return } if cl.cfg.txnID != nil && !p.producingTxn.Load() { - p.promiseRecord(promisedRec{ctx, promise, r}, errNotInTransaction) + p.promiseRecordBeforeBuf(promisedRec{ctx, promise, r}, errNotInTransaction) return } + userSize := r.userSize() + if cl.cfg.maxBufferedBytes > 0 && userSize > cl.cfg.maxBufferedBytes { + p.promiseRecordBeforeBuf(promisedRec{ctx, promise, r}, kerr.MessageTooLarge) + return + } + + // We have to grab the produce lock to check if this record will exceed + // configured limits. We try to keep the logic tight since this is + // effectively a global lock around producing. + var ( + nextBufRecs, nextBufBytes int64 + overMaxRecs, overMaxBytes bool + + calcNums = func() { + nextBufRecs = p.bufferedRecords + 1 + nextBufBytes = p.bufferedBytes + userSize + overMaxRecs = nextBufRecs > cl.cfg.maxBufferedRecords + overMaxBytes = cl.cfg.maxBufferedBytes > 0 && nextBufBytes > cl.cfg.maxBufferedBytes + } + ) + p.mu.Lock() + calcNums() if overMaxRecs || overMaxBytes { + if !block || cl.cfg.manualFlushing { + p.mu.Unlock() + p.promiseRecordBeforeBuf(promisedRec{ctx, promise, r}, ErrMaxBuffered) + return + } + + // Before we potentially unlinger, add that we are blocked to + // ensure we do NOT start a linger anymore. We THEN wakeup + // anything that is actively lingering. Note that blocked is + // also used when finishing promises to see if we need to be + // notified. + p.blocked.Add(1) + p.blockedBytes += userSize + p.mu.Unlock() + cl.cfg.logger.Log(LogLevelDebug, "blocking Produce because we are either over max buffered records or max buffered bytes", "over_max_records", overMaxRecs, "over_max_bytes", overMaxBytes, ) - // Before we potentially unlinger, add that we are blocked. - // Lingering always checks blocked, so we will not start a - // linger while we are blocked. We THEN wakeup anything that - // is actively lingering. - cl.producer.blocked.Add(1) + cl.unlingerDueToMaxRecsBuffered() - // If the client ctx cancels or the produce ctx cancels, we - // need to un-count our buffering of this record. We also need - // to drain a slot from the waitBuffer chan, which could be - // sent to right when we are erroring. + + // We keep the lock when we exit. If we are flushing, we want + // this blocked record to be produced before we return from + // flushing. This blocked record will be accounted for in the + // bufferedRecords addition below, after being removed from + // blocked in the goroutine. + wait := make(chan struct{}) + var quit bool + go func() { + defer close(wait) + p.mu.Lock() + calcNums() + for !quit && (overMaxRecs || overMaxBytes) { + p.c.Wait() + calcNums() + } + p.blocked.Add(-1) + p.blockedBytes -= userSize + }() + drainBuffered := func(err error) { - p.promiseRecord(promisedRec{ctx, promise, r}, err) - <-p.waitBuffer - cl.producer.blocked.Add(-1) - } - if !block || cl.cfg.manualFlushing { - drainBuffered(ErrMaxBuffered) - return + p.mu.Lock() + quit = true + p.mu.Unlock() + p.c.Broadcast() // wake the goroutine above + <-wait + p.mu.Unlock() // we wait for the goroutine to exit, then unlock again (since the goroutine leaves the mutex locked) + p.promiseRecordBeforeBuf(promisedRec{ctx, promise, r}, err) } + select { - case <-p.waitBuffer: - cl.cfg.logger.Log(LogLevelDebug, "Produce block signaled, continuing to produce") - cl.producer.blocked.Add(-1) + case <-wait: + cl.cfg.logger.Log(LogLevelDebug, "Produce block awoken, we now have space to produce, continuing to partition and produce") case <-cl.ctx.Done(): drainBuffered(ErrClientClosed) cl.cfg.logger.Log(LogLevelDebug, "client ctx canceled while blocked in Produce, returning") @@ -459,6 +498,9 @@ func (cl *Client) produce( return } } + p.bufferedRecords = nextBufRecs + p.bufferedBytes = nextBufBytes + p.mu.Unlock() cl.partitionRecord(promisedRec{ctx, promise, r}) } @@ -468,6 +510,7 @@ type batchPromise struct { pid int64 epoch int16 attrs RecordAttrs + beforeBuf bool partition int32 recs []promisedRec err error @@ -483,6 +526,10 @@ func (p *producer) promiseRecord(pr promisedRec, err error) { p.promiseBatch(batchPromise{recs: []promisedRec{pr}, err: err}) } +func (p *producer) promiseRecordBeforeBuf(pr promisedRec, err error) { + p.promiseBatch(batchPromise{recs: []promisedRec{pr}, beforeBuf: true, err: err}) +} + func (p *producer) finishPromises(b batchPromise) { cl := p.cl var more bool @@ -495,7 +542,7 @@ start: pr.ProducerID = b.pid pr.ProducerEpoch = b.epoch pr.Attrs = b.attrs - cl.finishRecordPromise(pr, b.err) + cl.finishRecordPromise(pr, b.err, b.beforeBuf) b.recs[i] = promisedRec{} } p.promisesMu.Unlock() @@ -509,7 +556,7 @@ start: } } -func (cl *Client) finishRecordPromise(pr promisedRec, err error) { +func (cl *Client) finishRecordPromise(pr promisedRec, err error, beforeBuffering bool) { p := &cl.producer if p.hooks != nil && len(p.hooks.unbuffered) > 0 { @@ -519,22 +566,27 @@ func (cl *Client) finishRecordPromise(pr promisedRec, err error) { } // Capture user size before potential modification by the promise. + // + // We call the promise before finishing the flush notification, + // allowing users of Flush to know all buf recs are done by the + // time we notify flush below. userSize := pr.userSize() - nowBufBytes := p.bufferedBytes.Add(-userSize) - nowBufRecs := p.bufferedRecords.Add(-1) - wasOverMaxRecs := nowBufRecs >= cl.cfg.maxBufferedRecords - wasOverMaxBytes := cl.cfg.maxBufferedBytes > 0 && nowBufBytes+userSize > cl.cfg.maxBufferedBytes - - // We call the promise before finishing the record; this allows users - // of Flush to know that all buffered records are completely done - // before Flush returns. pr.promise(pr.Record, err) - if wasOverMaxRecs || wasOverMaxBytes { - p.waitBuffer <- struct{}{} - } else if nowBufRecs == 0 && p.flushing.Load() > 0 { - p.mu.Lock() - p.mu.Unlock() //nolint:gocritic,staticcheck // We use the lock as a barrier, unlocking immediately is safe. + // If this record was never buffered, it's size was never accounted + // for on any p field: return early. + if beforeBuffering { + return + } + + // Keep the lock as tight as possible: the broadcast can come after. + p.mu.Lock() + p.bufferedBytes -= userSize + p.bufferedRecords-- + broadcast := p.blocked.Load() > 0 || p.bufferedRecords == 0 && p.flushing.Load() > 0 + p.mu.Unlock() + + if broadcast { p.c.Broadcast() } } @@ -1021,7 +1073,7 @@ func (cl *Client) Flush(ctx context.Context) error { defer p.mu.Unlock() defer close(done) - for !quit && p.bufferedRecords.Load() > 0 { + for !quit && p.bufferedRecords+int64(p.blocked.Load()) > 0 { p.c.Wait() } }() diff --git a/pkg/kgo/sink.go b/pkg/kgo/sink.go index 49bff3b1..f5283ddf 100644 --- a/pkg/kgo/sink.go +++ b/pkg/kgo/sink.go @@ -258,7 +258,7 @@ func (s *sink) produce(sem <-chan struct{}) bool { // We could have been triggered from a metadata update even though the // user is not producing at all. If we have no buffered records, let's // avoid potentially creating a producer ID. - if s.cl.producer.bufferedRecords.Load() == 0 { + if s.cl.BufferedProduceRecords() == 0 { return false } From 9e32897dc6b95fc6e683c40d14aafc0996f14eb7 Mon Sep 17 00:00:00 2001 From: Travis Bischel Date: Sun, 28 Jul 2024 21:47:42 -0600 Subject: [PATCH 3/3] .github: allow broker tests to run concurrent with linting, add 5m timeout --- .github/workflows/lint-and-test.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 34ac7d36..e111928f 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -31,7 +31,6 @@ jobs: integration-test-kafka: if: github.repository == 'twmb/franz-go' - needs: golangci runs-on: ubuntu-latest name: "integration test kafka" container: golang:latest @@ -55,7 +54,7 @@ jobs: KAFKA_KRAFT_CLUSTER_ID: XkpGZQ27R3eTl3OdTm2LYA # 16 byte base64-encoded UUID steps: - uses: actions/checkout@v4 - - run: go test ./... + - run: go test -timeout 5m ./... env: KGO_TEST_RF: 1 KGO_SEEDS: kafka:9092 @@ -63,7 +62,6 @@ jobs: integration-test-redpanda: if: github.repository == 'twmb/franz-go' - needs: golangci runs-on: ubuntu-latest name: "integration test redpanda" container: golang:latest @@ -76,7 +74,7 @@ jobs: REDPANDA_ADVERTISE_KAFKA_ADDRESS: redpanda:9092 steps: - uses: actions/checkout@v4 - - run: go test ./... + - run: go test -timeout 5m ./... env: KGO_TEST_RF: 1 KGO_SEEDS: redpanda:9092