From 59ea183995f66ea06ccc218796e1301f310d2fdc Mon Sep 17 00:00:00 2001 From: mlycore Date: Fri, 24 Nov 2023 19:00:49 +0800 Subject: [PATCH 1/9] fix: detect disk space error Signed-off-by: mlycore --- pitr/agent/internal/pkg/opengauss.go | 35 ++++++++++++++----- pitr/agent/pkg/cmds/cmd.go | 50 +++++++++++++++++++++++----- pitr/cli/internal/cmd/backup.go | 34 ++++++++++--------- 3 files changed, 85 insertions(+), 34 deletions(-) diff --git a/pitr/agent/internal/pkg/opengauss.go b/pitr/agent/internal/pkg/opengauss.go index 21872c69..176dc6b8 100644 --- a/pitr/agent/internal/pkg/opengauss.go +++ b/pitr/agent/internal/pkg/opengauss.go @@ -98,6 +98,10 @@ const ( ) func (og *openGauss) AsyncBackup(backupPath, instanceName, backupMode string, threadsNum uint8, dbPort uint16) (string, error) { + var ( + bid string + err error + ) cmd := fmt.Sprintf(_backupFmt, backupPath, instanceName, backupMode, og.pgData, threadsNum, dbPort) outputs, err := cmds.AsyncExec(og.shell, cmd) if err != nil { @@ -112,22 +116,35 @@ func (og *openGauss) AsyncBackup(backupPath, instanceName, backupMode string, th Field("pgdata", og.pgData). Debug(fmt.Sprintf("AsyncBackup output[lineNo=%d,msg=%s,err=%v]", output.LineNo, output.Message, output.Error)) + fmt.Printf("[lineNo=%d,msg=%s,err=%v]\n", output.LineNo, output.Message, output.Error) + if output.Error != nil { og.log.Error(fmt.Sprintf("output.Error[%s] is not nil", output.Error)) return "", output.Error } - // get the backup id from the first line - bid, err := og.getBackupID(output.Message) - if err != nil { - og.log.Error(fmt.Sprintf("og.getBackupID[source=%s] return err wrap: %s", output.Message, err)) - return "", err + // fmt.Printf("lalala: %s\n", output.Message) + + if strings.Contains(output.Message, "INFO: Backup start") { + bid, err = og.getBackupID(output.Message) + if err != nil { + og.log.Error(fmt.Sprintf("og.getBackupID[source=%s] return err wrap: %s", output.Message, err)) + return "", err + } } + + // get the backup id from the first line + // bid, err := og.getBackupID(output.Message) + // if err != nil { + // og.log.Error(fmt.Sprintf("og.getBackupID[source=%s] return err wrap: %s", output.Message, err)) + // return "", err + // } // ignore other output - go og.ignore(outputs) - return bid, nil //nolint + // go og.ignore(outputs) + // return bid, nil //nolint } - return "", fmt.Errorf("unknow err") + return bid, nil //nolint + // return "", fmt.Errorf("unknow err") } //nolint:dupl @@ -192,7 +209,7 @@ func (og *openGauss) AddInstance(backupPath, instance string) error { if errors.Is(err, cons.CmdOperateFailed) { og.log.Error(fmt.Sprintf("add instance failure[output=%s], err: %s, wrap: %s", output, err, cons.InstanceAlreadyExist)) - return err + return fmt.Errorf("add instance failure[output=%s], err: %s, wrap: %w", output, err, cons.InstanceAlreadyExist) } if err != nil { og.log.Error(fmt.Sprintf(_CmdErrorFmt, og.shell, cmd, cons.CmdAddInstanceFailed)) diff --git a/pitr/agent/pkg/cmds/cmd.go b/pitr/agent/pkg/cmds/cmd.go index 486148b1..61f92923 100644 --- a/pitr/agent/pkg/cmds/cmd.go +++ b/pitr/agent/pkg/cmds/cmd.go @@ -49,6 +49,7 @@ func AsyncExec(name string, args ...string) (chan *Output, error) { if err != nil { return nil, fmt.Errorf("can not obtain stdout pipe for command[args=%+v]:%s", args, err) } + if err = cmd.Start(); err != nil { return nil, fmt.Errorf("the command is err[args=%+v]:%s", args, err) } @@ -61,11 +62,30 @@ func AsyncExec(name string, args ...string) (chan *Output, error) { go func() { if err = syncutils.NewRecoverFuncWithErrRet("", func() error { for scanner.Scan() { - output <- &Output{ + op := &Output{ LineNo: index, Message: scanner.Text(), Error: err, } + if strings.Contains(scanner.Text(), "No space left on device") { + fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) + op.Error = fmt.Errorf("%s", "No space left on device") + } + /* + if strings.Contains(scanner.Text(), "No space left on device") { + fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) + return fmt.Errorf("%s", "No space left on device") + } + */ + /* + output <- &Output{ + LineNo: index, + Message: scanner.Text(), + Error: err, + } + */ + output <- op + index++ } @@ -78,13 +98,14 @@ func AsyncExec(name string, args ...string) (chan *Output, error) { if err = cmd.Wait(); err != nil { if ee, ok := err.(*exec.ExitError); ok { - logging.Error(fmt.Sprintf("exec failure[ee=%s], wrap=%s", ee, cons.CmdOperateFailed)) + output <- &Output{ + Error: fmt.Errorf("exec failure[ee=%s], wrap=%w", ee, cons.CmdOperateFailed), + } + } else { + output <- &Output{ + Error: fmt.Errorf("%s err: %s", cmd.String(), err), + } } - - output <- &Output{ - Error: fmt.Errorf("%s err: %s", cmd.String(), err), - } - } return nil })(); err != nil { @@ -113,6 +134,12 @@ func Exec(name string, args ...string) (string, error) { if err != nil { return "", fmt.Errorf("can not obtain stdout pipe for command[args=%+v]:%s", args, err) } + + stderr, err := cmd.StderrPipe() + if err != nil { + return "", fmt.Errorf("can not obtain stderr pipe for cmand[args=%+v]:%s", args, err) + } + if err = cmd.Start(); err != nil { return "", fmt.Errorf("the command is err[args=%+v]:%s", args, err) } @@ -122,11 +149,16 @@ func Exec(name string, args ...string) (string, error) { return "", fmt.Errorf("io.ReadAll return err=%w", err) } + ereader, err := io.ReadAll(stderr) + if err != nil { + return "", fmt.Errorf("io.ReadAll return err=%w", err) + } + if err = cmd.Wait(); err != nil { if ee, ok := err.(*exec.ExitError); ok { - logging.Error(fmt.Sprintf("exec failure[ee=%s,stdout=%s]", ee, string(reader))) + return "", fmt.Errorf("exec failure[ee=%s,stdout=%s], wrap:%w", ee, string(reader), cons.CmdOperateFailed) } - return "", fmt.Errorf("%s err: %s", cmd.String(), err) + return "", fmt.Errorf("%s err: %s", cmd.String(), string(ereader)) } return string(reader), nil } diff --git a/pitr/cli/internal/cmd/backup.go b/pitr/cli/internal/cmd/backup.go index 1b59e3c6..cfaab968 100644 --- a/pitr/cli/internal/cmd/backup.go +++ b/pitr/cli/internal/cmd/backup.go @@ -310,21 +310,23 @@ func _execBackup(as pkg.IAgentServer, node *model.StorageNode, dnCh chan *model. Instance: defaultInstance, } backupID, err := as.Backup(in) + status := model.SsBackupStatusRunning if err != nil { - return xerr.NewCliErr(err.Error()) + // return xerr.NewCliErr(fmt.Sprintf("data node backup error: %s", err)) + status = model.BackupStatus(err.Error()) } // update DnList of lsBackup dn := &model.DataNode{ IP: node.IP, Port: node.Port, - Status: model.SsBackupStatusRunning, + Status: status, BackupID: backupID, StartTime: timeutil.Now().String(), EndTime: timeutil.Init(), } dnCh <- dn - return nil + return err } func checkBackupStatus(lsBackup *model.LsBackup) model.BackupStatus { @@ -498,20 +500,20 @@ func deleteBackupFiles(ls pkg.ILocalStorage, lsBackup *model.LsBackup, m deleteM close(resultCh) - t := table.NewWriter() - t.SetOutputMirror(os.Stdout) - t.SetTitle("Delete Backup Files Result") - t.AppendHeader(table.Row{"#", "Node IP", "Node Port", "Result", "Message"}) - t.SetColumnConfigs([]table.ColumnConfig{{Number: 5, WidthMax: 50}}) - - idx := 0 - for result := range resultCh { - idx++ - t.AppendRow([]interface{}{idx, result.IP, result.Port, result.Status, result.Msg}) - t.AppendSeparator() - } + // t := table.NewWriter() + // t.SetOutputMirror(os.Stdout) + // t.SetTitle("Delete Backup Files Result") + // t.AppendHeader(table.Row{"#", "Node IP", "Node Port", "Result", "Message"}) + // t.SetColumnConfigs([]table.ColumnConfig{{Number: 5, WidthMax: 50}}) + + // idx := 0 + // for result := range resultCh { + // idx++ + // t.AppendRow([]interface{}{idx, result.IP, result.Port, result.Status, result.Msg}) + // t.AppendSeparator() + // } - t.Render() + // t.Render() } if err := ls.DeleteByName(filename); err != nil { From a8a5d1dc09cab8930d78f4a9d2cb2f1da57f5163 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 09:46:56 +0800 Subject: [PATCH 2/9] feat: add data node ip and address to error message Signed-off-by: mlycore --- pitr/cli/internal/cmd/backup.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pitr/cli/internal/cmd/backup.go b/pitr/cli/internal/cmd/backup.go index cfaab968..73f2167e 100644 --- a/pitr/cli/internal/cmd/backup.go +++ b/pitr/cli/internal/cmd/backup.go @@ -326,7 +326,7 @@ func _execBackup(as pkg.IAgentServer, node *model.StorageNode, dnCh chan *model. EndTime: timeutil.Init(), } dnCh <- dn - return err + return fmt.Errorf("data node %s:%d backup error: %s", node.IP, node.Port, err) } func checkBackupStatus(lsBackup *model.LsBackup) model.BackupStatus { From cc2e039e4239f6deda2510643967ffc6e044e627 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:32:38 +0800 Subject: [PATCH 3/9] fix: remove useless log Signed-off-by: mlycore --- pitr/agent/internal/pkg/opengauss.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pitr/agent/internal/pkg/opengauss.go b/pitr/agent/internal/pkg/opengauss.go index 176dc6b8..8f6c5c78 100644 --- a/pitr/agent/internal/pkg/opengauss.go +++ b/pitr/agent/internal/pkg/opengauss.go @@ -116,7 +116,7 @@ func (og *openGauss) AsyncBackup(backupPath, instanceName, backupMode string, th Field("pgdata", og.pgData). Debug(fmt.Sprintf("AsyncBackup output[lineNo=%d,msg=%s,err=%v]", output.LineNo, output.Message, output.Error)) - fmt.Printf("[lineNo=%d,msg=%s,err=%v]\n", output.LineNo, output.Message, output.Error) + // fmt.Printf("[lineNo=%d,msg=%s,err=%v]\n", output.LineNo, output.Message, output.Error) if output.Error != nil { og.log.Error(fmt.Sprintf("output.Error[%s] is not nil", output.Error)) From 20181fb82b4ce5ef94d230b841016f54a91b4240 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:33:55 +0800 Subject: [PATCH 4/9] feat: add disk space error check to cmd output Signed-off-by: mlycore --- pitr/agent/pkg/cmds/cmd.go | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pitr/agent/pkg/cmds/cmd.go b/pitr/agent/pkg/cmds/cmd.go index 61f92923..f4bf7151 100644 --- a/pitr/agent/pkg/cmds/cmd.go +++ b/pitr/agent/pkg/cmds/cmd.go @@ -68,24 +68,11 @@ func AsyncExec(name string, args ...string) (chan *Output, error) { Error: err, } if strings.Contains(scanner.Text(), "No space left on device") { - fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) + // fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) op.Error = fmt.Errorf("%s", "No space left on device") } - /* - if strings.Contains(scanner.Text(), "No space left on device") { - fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) - return fmt.Errorf("%s", "No space left on device") - } - */ - /* - output <- &Output{ - LineNo: index, - Message: scanner.Text(), - Error: err, - } - */ - output <- op + output <- op index++ } From 56c4107f8cae6fb187a378b517000a4a2ec39866 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:34:25 +0800 Subject: [PATCH 5/9] fix: fix return error Signed-off-by: mlycore --- pitr/cli/internal/cmd/backup.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pitr/cli/internal/cmd/backup.go b/pitr/cli/internal/cmd/backup.go index 73f2167e..a282f816 100644 --- a/pitr/cli/internal/cmd/backup.go +++ b/pitr/cli/internal/cmd/backup.go @@ -326,7 +326,10 @@ func _execBackup(as pkg.IAgentServer, node *model.StorageNode, dnCh chan *model. EndTime: timeutil.Init(), } dnCh <- dn - return fmt.Errorf("data node %s:%d backup error: %s", node.IP, node.Port, err) + if err != nil { + return fmt.Errorf("data node %s:%d backup error: %s", node.IP, node.Port, err) + } + return nil } func checkBackupStatus(lsBackup *model.LsBackup) model.BackupStatus { From 6b882b7efaceddc1fd47d1d53b28b2a8d76daefb Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:37:21 +0800 Subject: [PATCH 6/9] feat: hide delete backupfiles output Signed-off-by: mlycore --- pitr/cli/internal/cmd/backup.go | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/pitr/cli/internal/cmd/backup.go b/pitr/cli/internal/cmd/backup.go index a282f816..c54c9cab 100644 --- a/pitr/cli/internal/cmd/backup.go +++ b/pitr/cli/internal/cmd/backup.go @@ -134,12 +134,7 @@ func backup() error { } if lsBackup != nil { - if cancel { - deleteBackupFiles(ls, lsBackup, deleteModeQuiet) - } else { - logging.Warn("Try to delete backup data ...") - deleteBackupFiles(ls, lsBackup, deleteModeNormal) - } + deleteBackupFiles(ls, lsBackup, deleteModeQuiet) } } }() @@ -503,20 +498,20 @@ func deleteBackupFiles(ls pkg.ILocalStorage, lsBackup *model.LsBackup, m deleteM close(resultCh) - // t := table.NewWriter() - // t.SetOutputMirror(os.Stdout) - // t.SetTitle("Delete Backup Files Result") - // t.AppendHeader(table.Row{"#", "Node IP", "Node Port", "Result", "Message"}) - // t.SetColumnConfigs([]table.ColumnConfig{{Number: 5, WidthMax: 50}}) - - // idx := 0 - // for result := range resultCh { - // idx++ - // t.AppendRow([]interface{}{idx, result.IP, result.Port, result.Status, result.Msg}) - // t.AppendSeparator() - // } + t := table.NewWriter() + t.SetOutputMirror(os.Stdout) + t.SetTitle("Delete Backup Files Result") + t.AppendHeader(table.Row{"#", "Node IP", "Node Port", "Result", "Message"}) + t.SetColumnConfigs([]table.ColumnConfig{{Number: 5, WidthMax: 50}}) + + idx := 0 + for result := range resultCh { + idx++ + t.AppendRow([]interface{}{idx, result.IP, result.Port, result.Status, result.Msg}) + t.AppendSeparator() + } - // t.Render() + t.Render() } if err := ls.DeleteByName(filename); err != nil { From 3e5604fb0c07fc701c69d9a66a327c2c61e80064 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:43:30 +0800 Subject: [PATCH 7/9] fix: remove comments Signed-off-by: mlycore --- pitr/agent/internal/pkg/opengauss.go | 15 --------------- pitr/agent/pkg/cmds/cmd.go | 1 - pitr/cli/internal/cmd/backup.go | 1 - 3 files changed, 17 deletions(-) diff --git a/pitr/agent/internal/pkg/opengauss.go b/pitr/agent/internal/pkg/opengauss.go index 8f6c5c78..fcba1e92 100644 --- a/pitr/agent/internal/pkg/opengauss.go +++ b/pitr/agent/internal/pkg/opengauss.go @@ -116,15 +116,11 @@ func (og *openGauss) AsyncBackup(backupPath, instanceName, backupMode string, th Field("pgdata", og.pgData). Debug(fmt.Sprintf("AsyncBackup output[lineNo=%d,msg=%s,err=%v]", output.LineNo, output.Message, output.Error)) - // fmt.Printf("[lineNo=%d,msg=%s,err=%v]\n", output.LineNo, output.Message, output.Error) - if output.Error != nil { og.log.Error(fmt.Sprintf("output.Error[%s] is not nil", output.Error)) return "", output.Error } - // fmt.Printf("lalala: %s\n", output.Message) - if strings.Contains(output.Message, "INFO: Backup start") { bid, err = og.getBackupID(output.Message) if err != nil { @@ -132,19 +128,8 @@ func (og *openGauss) AsyncBackup(backupPath, instanceName, backupMode string, th return "", err } } - - // get the backup id from the first line - // bid, err := og.getBackupID(output.Message) - // if err != nil { - // og.log.Error(fmt.Sprintf("og.getBackupID[source=%s] return err wrap: %s", output.Message, err)) - // return "", err - // } - // ignore other output - // go og.ignore(outputs) - // return bid, nil //nolint } return bid, nil //nolint - // return "", fmt.Errorf("unknow err") } //nolint:dupl diff --git a/pitr/agent/pkg/cmds/cmd.go b/pitr/agent/pkg/cmds/cmd.go index f4bf7151..77a98f53 100644 --- a/pitr/agent/pkg/cmds/cmd.go +++ b/pitr/agent/pkg/cmds/cmd.go @@ -68,7 +68,6 @@ func AsyncExec(name string, args ...string) (chan *Output, error) { Error: err, } if strings.Contains(scanner.Text(), "No space left on device") { - // fmt.Printf("[%d] %s, [%s]\n", index, scanner.Text(), err) op.Error = fmt.Errorf("%s", "No space left on device") } diff --git a/pitr/cli/internal/cmd/backup.go b/pitr/cli/internal/cmd/backup.go index c54c9cab..77f19a52 100644 --- a/pitr/cli/internal/cmd/backup.go +++ b/pitr/cli/internal/cmd/backup.go @@ -307,7 +307,6 @@ func _execBackup(as pkg.IAgentServer, node *model.StorageNode, dnCh chan *model. backupID, err := as.Backup(in) status := model.SsBackupStatusRunning if err != nil { - // return xerr.NewCliErr(fmt.Sprintf("data node backup error: %s", err)) status = model.BackupStatus(err.Error()) } From 26786045c4a2b2d6e66d619b41f5675657a7915d Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 10:45:29 +0800 Subject: [PATCH 8/9] chore: add golint comment Signed-off-by: mlycore --- pitr/agent/internal/pkg/opengauss.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pitr/agent/internal/pkg/opengauss.go b/pitr/agent/internal/pkg/opengauss.go index fcba1e92..84866d3c 100644 --- a/pitr/agent/internal/pkg/opengauss.go +++ b/pitr/agent/internal/pkg/opengauss.go @@ -349,6 +349,7 @@ func (og *openGauss) ShowBackupList(backupPath, instanceName string) ([]*model.B return og.showbackup(cmd, instanceName) } +//nolint:unused func (og *openGauss) ignore(outputs chan *cmds.Output) { defer func() { _ = recover() From edfade46c908d19150ac6ff3c289d44133db6113 Mon Sep 17 00:00:00 2001 From: mlycore Date: Mon, 27 Nov 2023 12:01:45 +0800 Subject: [PATCH 9/9] fix: fix test Signed-off-by: mlycore --- pitr/cli/internal/cmd/backup_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pitr/cli/internal/cmd/backup_test.go b/pitr/cli/internal/cmd/backup_test.go index ee72697a..8c7387ca 100644 --- a/pitr/cli/internal/cmd/backup_test.go +++ b/pitr/cli/internal/cmd/backup_test.go @@ -180,9 +180,9 @@ var _ = Describe("Backup", func() { as.EXPECT().Backup(gomock.Any()).Return("", xerr.NewCliErr("backup failed")) - Expect(_execBackup(as, bak.SsBackup.StorageNodes[0], dnCh)).ToNot(BeNil()) + Expect(_execBackup(as, bak.SsBackup.StorageNodes[1], dnCh)).ToNot(BeNil()) close(dnCh) - Expect(len(dnCh)).To(Equal(1)) + Expect(len(dnCh)).To(Equal(2)) }) })