From df667f915df7c859c65557047d957b50d075bd7e Mon Sep 17 00:00:00 2001 From: Viktor Kurilko Date: Thu, 18 Sep 2025 16:34:48 +0700 Subject: [PATCH 1/3] Revert "Fix inefficient sql for restore statistics (#130)" This patch fixed inefficient sql when restoring statistics by replacing the IN operator with =. To fix existing backups, the nested loop was enabled when restoring statistics. This was done only for a certain range of patchsets. But it turned out that the binaries that are supplied to customers do not have a patchset number, which is why this optimization is not activated. Therefore, this patch has been reworked in the next commits. This reverts commit bbbd8013a1de8cf1a0efe7e13dd4aa1aced1411d. --- arenadata/arenadata_helper.go | 56 +++----- backup/statistics.go | 2 +- backup/statistics_test.go | 8 +- restore/restore.go | 4 - restore/restore_internal_test.go | 216 +++---------------------------- 5 files changed, 39 insertions(+), 247 deletions(-) diff --git a/arenadata/arenadata_helper.go b/arenadata/arenadata_helper.go index e330f9e86..a30cf7862 100644 --- a/arenadata/arenadata_helper.go +++ b/arenadata/arenadata_helper.go @@ -3,12 +3,8 @@ package arenadata import ( "regexp" "strconv" - "strings" - "github.com/GreengageDB/gp-common-go-libs/dbconn" "github.com/GreengageDB/gp-common-go-libs/gplog" - "github.com/greenplum-db/gpbackup/history" - "github.com/greenplum-db/gpbackup/toc" "github.com/pkg/errors" ) @@ -17,9 +13,22 @@ var ( ) func EnsureAdVersionCompatibility(backupVersion string, restoreVersion string) { - adBackup := getArenadataVersion(backupVersion) - adRestore := getArenadataVersion(restoreVersion) - + var ( + adBackup, adRestore int + err error + ) + if strVersion := adPattern.FindAllStringSubmatch(backupVersion, -1); len(strVersion) > 0 { + adBackup, err = strconv.Atoi(strVersion[0][1]) + gplog.FatalOnError(err) + } else { + gplog.Fatal(errors.Errorf("Invalid arenadata version format for gpbackup: %s", backupVersion), "") + } + if strVersion := adPattern.FindAllStringSubmatch(restoreVersion, -1); len(strVersion) > 0 { + adRestore, err = strconv.Atoi(strVersion[0][1]) + gplog.FatalOnError(err) + } else { + gplog.Fatal(errors.Errorf("Invalid arenadata version format for gprestore: %s", restoreVersion), "") + } if adRestore < adBackup { gplog.Fatal(errors.Errorf("gprestore arenadata%d cannot restore a backup taken with gpbackup arenadata%d; please use gprestore arenadata%d or later.", adRestore, adBackup, adBackup), "") @@ -31,36 +40,3 @@ func EnsureAdVersionCompatibility(backupVersion string, restoreVersion string) { func GetOriginalVersion(fullVersion string) string { return adPattern.ReplaceAllString(fullVersion, "") } - -func PatchStatisticsStatements(backupConfig *history.BackupConfig, connectionPool *dbconn.DBConn, statements []toc.StatementWithType) []toc.StatementWithType { - // Backups created in versions 1.30.5_arenadata16 to 1.30.5_arenadata19 have ineffective sql for - // deleting statistics, which can affect restore performance. as a workaround for these - // versions, we enable nested loop. - if connectionPool.Version.Is("6") && - strings.Contains(backupConfig.BackupVersion, "1.30.5_arenadata") && - len(statements) > 0 { - arenadataVersion := getArenadataVersion(backupConfig.BackupVersion) - - if arenadataVersion >= 16 && arenadataVersion <= 19 { - statements = append(statements, toc.StatementWithType{}) - copy(statements[1:], statements[:]) - statements[0] = toc.StatementWithType{ - Statement: "SET enable_nestloop = ON;", - } - statements = append(statements, toc.StatementWithType{ - Statement: "RESET enable_nestloop;", - }) - } - } - return statements -} - -func getArenadataVersion(fullVersion string) uint { - match := adPattern.FindStringSubmatch(fullVersion) - if len(match) != 2 { - gplog.Fatal(errors.Errorf("Invalid arenadata version format for gpbackup: %s", fullVersion), "") - } - result, err := strconv.ParseUint(match[1], 10, 32) - gplog.FatalOnError(err) - return uint(result) -} diff --git a/backup/statistics.go b/backup/statistics.go index d16eafeae..a40f35163 100644 --- a/backup/statistics.go +++ b/backup/statistics.go @@ -69,7 +69,7 @@ func GenerateAttributeStatisticsQueries(table Table, attStat AttributeStatistic) attributeSlotsQueryStr = generateAttributeSlotsQuery4(attStat) } - attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = + attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = %s AND attname = '%s');`, starelidStr, utils.EscapeSingleQuotes(attStat.AttName))) attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, diff --git a/backup/statistics_test.go b/backup/statistics_test.go index 9b0b4e73e..485cb42da 100644 --- a/backup/statistics_test.go +++ b/backup/statistics_test.go @@ -100,7 +100,7 @@ SET reltuples = 0.000000::real WHERE oid = 'testschema.testtable2'::regclass::oid;`, - `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray');`, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, @@ -126,7 +126,7 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`, NULL FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5), - `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt');`, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, @@ -181,7 +181,7 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`)) } attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats) - Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`))) insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(0, 0) @@ -220,7 +220,7 @@ FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats) - Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`))) insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(10, 12) diff --git a/restore/restore.go b/restore/restore.go index 623dde6bf..4aa4b3323 100644 --- a/restore/restore.go +++ b/restore/restore.go @@ -3,7 +3,6 @@ package restore import ( "bufio" "fmt" - "github.com/greenplum-db/gpbackup/arenadata" "os" "regexp" "runtime/debug" @@ -553,9 +552,6 @@ func restoreStatistics() { statements := GetRestoreMetadataStatementsFiltered("statistics", statisticsFilename, []string{}, []string{}, filters) editStatementsRedirectSchema(statements, opts.RedirectSchema) - - statements = arenadata.PatchStatisticsStatements(backupConfig, connectionPool, statements) - numErrors := ExecuteRestoreMetadataStatements("statistics", statements, "Table statistics", nil, utils.PB_VERBOSE, false) if numErrors > 0 { diff --git a/restore/restore_internal_test.go b/restore/restore_internal_test.go index d99dbfda5..77f2f4294 100644 --- a/restore/restore_internal_test.go +++ b/restore/restore_internal_test.go @@ -1,17 +1,8 @@ package restore import ( - "github.com/DATA-DOG/go-sqlmock" - "github.com/GreengageDB/gp-common-go-libs/cluster" "github.com/GreengageDB/gp-common-go-libs/dbconn" - "github.com/GreengageDB/gp-common-go-libs/gplog" - "github.com/GreengageDB/gp-common-go-libs/testhelper" - "github.com/greenplum-db/gpbackup/filepath" - "github.com/greenplum-db/gpbackup/history" - "github.com/greenplum-db/gpbackup/options" "github.com/greenplum-db/gpbackup/toc" - "os" - path "path/filepath" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -69,47 +60,47 @@ var _ = Describe("restore internal tests", func() { }, { Schema: "foo", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o".bar'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o"."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o"."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o".bar'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o"."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o"."b.ar"'::regclass::oid AND attname = 'i');`, }, } @@ -185,47 +176,47 @@ var _ = Describe("restore internal tests", func() { }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, } @@ -236,175 +227,4 @@ var _ = Describe("restore internal tests", func() { } }) }) - Describe("Restore statistic", Ordered, func() { - var mock sqlmock.Sqlmock - BeforeAll(func() { - err := os.MkdirAll(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/"), 0777) - Expect(err).ToNot(HaveOccurred()) - - err = os.WriteFile(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_statistics.sql"), []byte( - `UPDATE pg_class -SET - relpages = 194::int, - reltuples = 10000.000000::real -WHERE oid = 'public.t1'::regclass::oid; - -DELETE FROM pg_statistic WHERE (starelid, staattnum) = - (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'public.t1'::regclass::oid AND attname = 'a'); -INSERT INTO pg_statistic SELECT - attrelid, - attnum, - false::boolean, - 0.000000::real, - 8::integer, - -1.000000::real, - 2::smallint, - 3::smallint, - 0::smallint, - 0::smallint, - 0::smallint, - 412::oid, - 412::oid, - 0::oid, - 0::oid, - 0::oid, - NULL::real[], - '{"0.313703984"}'::real[], - NULL::real[], - NULL::real[], - NULL::real[], - array_in('{"1","100",}', 'int8'::regtype::oid, -1), - NULL, - NULL, - NULL, - NULL -FROM pg_attribute WHERE attrelid = 'public.t1'::regclass::oid AND attname = 'a'; -`), 0777) - Expect(err).ToNot(HaveOccurred()) - - err = os.WriteFile(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_toc.yaml"), []byte(` -statisticsentries: -- schema: public - name: t1 - objecttype: STATISTICS - referenceobject: "" - startbyte: 0 - endbyte: 129 - tier: - - 0 - - 0 -- schema: public - name: t1 - objecttype: STATISTICS - referenceobject: "" - startbyte: 129 - endbyte: 299 - tier: - - 0 - - 0 -- schema: public - name: t1 - objecttype: STATISTICS - referenceobject: "" - startbyte: 299 - endbyte: 966 - tier: - - 0 - - 0 -`), 0777) - - gplog.InitializeLogging("gprestore", path.Join(os.TempDir(), "gprestore_test")) - gplog.SetLogFileVerbosity(gplog.LOGERROR) - - opts = &options.Options{} - - globalTOC = toc.NewTOC(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_toc.yaml")) - globalTOC.InitializeMetadataEntryMap() - - DeferCleanup(func() { - err := os.RemoveAll(path.Join(os.TempDir(), "backup")) - Expect(err).NotTo(HaveOccurred()) - }) - }) - - BeforeEach(func() { - configCoordinator := cluster.SegConfig{ContentID: -1, Hostname: "localhost", DataDir: "gpseg-1"} - testCluster := cluster.NewCluster([]cluster.SegConfig{configCoordinator}) - testFPInfo := filepath.NewFilePathInfo(testCluster, "", "20250815120713", - "gpseg", false) - testFPInfo.SegDirMap[-1] = path.Join(os.TempDir(), "backup/gpseg-1") - SetFPInfo(testFPInfo) - - connectionPool, mock, _, _, _ = testhelper.SetupTestEnvironment() - connectionPool.Version = dbconn.NewVersion("6.0.0") - }) - - DescribeTable("Restore statistic with different backup versions", func(backupVersion string, needNestedLoop bool) { - testConfig := history.BackupConfig{ - BackupVersion: backupVersion, - } - SetBackupConfig(&testConfig) - - if needNestedLoop { - mock.ExpectExec("SET enable_nestloop = ON;").WillReturnResult(sqlmock.NewResult(0, 0)) - } - mock.ExpectExec("UPDATE pg_class SET relpages = 194::int, reltuples = 10000\\.000000::real WHERE oid = 'public\\.t1'::regclass::oid;"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("DELETE FROM pg_statistic WHERE \\(starelid, staattnum\\) =" + - " \\(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'public\\.t1'::regclass::oid AND attname = 'a'\\);"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO pg_statistic SELECT" + - " attrelid, attnum," + - " false::boolean," + - " 0\\.000000::real," + - " 8::integer," + - " -1.000000::real," + - " 2::smallint, 3::smallint, 0::smallint, 0::smallint, 0::smallint," + - " 412::oid, 412::oid, 0::oid, 0::oid, 0::oid," + - " NULL::real\\[\\], '{\"0.313703984\"}'::real\\[\\], NULL::real\\[\\], NULL::real\\[\\], NULL::real\\[\\]," + - " array_in\\('{\"1\",\"100\",}', 'int8'::regtype::oid, -1\\)," + - " NULL, NULL, NULL, NULL FROM pg_attribute WHERE attrelid = 'public.t1'::regclass::oid AND attname = 'a';"). - WillReturnResult(sqlmock.NewResult(0, 1)) - if needNestedLoop { - mock.ExpectExec("RESET enable_nestloop;").WillReturnResult(sqlmock.NewResult(0, 0)) - } - restoreStatistics() - err := mock.ExpectationsWereMet() - Expect(err).NotTo(HaveOccurred()) - }, - Entry("before problematic version", "1.30.5_arenadata15", false), - Entry("problematic version", "1.30.5_arenadata18", true), - Entry("after problematic version", "1.30.5_arenadata20", false), - ) - - // This test must be before the "statistic is empty" test since the statement list must not be empty. - It("Wrong version", func() { - defer testhelper.ShouldPanicWithMessage("Invalid arenadata version format for gpbackup: 1.30.5_arenadataABC") - testConfig := history.BackupConfig{ - BackupVersion: "1.30.5_arenadataABC", - } - SetBackupConfig(&testConfig) - restoreStatistics() - }) - - It("statistic is empty", func() { - testConfig := history.BackupConfig{ - BackupVersion: "1.30.5_arenadata18", - } - SetBackupConfig(&testConfig) - - err := os.WriteFile(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_statistics.sql"), - []byte("SET client_encoding = 'UTF8';"), 0777) - Expect(err).NotTo(HaveOccurred()) - err = os.WriteFile(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_toc.yaml"), []byte(""), 0777) - Expect(err).NotTo(HaveOccurred()) - - globalTOC = toc.NewTOC(path.Join(os.TempDir(), "backup/gpseg-1/backups/20250815/20250815120713/gpbackup_20250815120713_toc.yaml")) - globalTOC.InitializeMetadataEntryMap() - - restoreStatistics() - err = mock.ExpectationsWereMet() - Expect(err).NotTo(HaveOccurred()) - }) - }) }) From eecb1f88fa2183ca4a5600dfc75e21024f2cc55c Mon Sep 17 00:00:00 2001 From: Viktor Kurilko Date: Thu, 18 Sep 2025 17:38:31 +0700 Subject: [PATCH 2/3] Fix inefficient sql for restore statistics Statistics are restored in 3 stages. In the first step, we update reltuples in pg_class. In the second step, we delete statistics for a specific attribute in pg_statistic. And at the last stage, we insert new statistics for this attribute. At the second stage, to determine the attribute number, it is searched in pg_attribute. For this, a subquery and the IN operator were used. This led to an inefficient plan that contains a seq scan on pg_statistic. This, in turn, can significantly affect the speed of statistical restore. Fix this by replacing the IN operator with = in the attribute statistics deletion query. We can be sure that the subquery will return no more than one row, since pg_attribute has a unique index on the attrelid and attname attributes. --- backup/statistics.go | 2 +- backup/statistics_test.go | 8 +++---- restore/restore_internal_test.go | 36 ++++++++++++++++---------------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/backup/statistics.go b/backup/statistics.go index a40f35163..d16eafeae 100644 --- a/backup/statistics.go +++ b/backup/statistics.go @@ -69,7 +69,7 @@ func GenerateAttributeStatisticsQueries(table Table, attStat AttributeStatistic) attributeSlotsQueryStr = generateAttributeSlotsQuery4(attStat) } - attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = %s AND attname = '%s');`, starelidStr, utils.EscapeSingleQuotes(attStat.AttName))) attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, diff --git a/backup/statistics_test.go b/backup/statistics_test.go index 485cb42da..9b0b4e73e 100644 --- a/backup/statistics_test.go +++ b/backup/statistics_test.go @@ -100,7 +100,7 @@ SET reltuples = 0.000000::real WHERE oid = 'testschema.testtable2'::regclass::oid;`, - `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray');`, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, @@ -126,7 +126,7 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`, NULL FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5), - `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt');`, fmt.Sprintf(`INSERT INTO pg_statistic SELECT attrelid, @@ -181,7 +181,7 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`)) } attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats) - Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`))) insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(0, 0) @@ -220,7 +220,7 @@ FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats) - Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`))) insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(10, 12) diff --git a/restore/restore_internal_test.go b/restore/restore_internal_test.go index 77f2f4294..9e4d5eba1 100644 --- a/restore/restore_internal_test.go +++ b/restore/restore_internal_test.go @@ -60,47 +60,47 @@ var _ = Describe("restore internal tests", func() { }, { Schema: "foo", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o".bar'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o"."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo.o"`, Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo.o"."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o".bar'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o"."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: `"fo'o"`, Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = '"fo''o"."b.ar"'::regclass::oid AND attname = 'i');`, }, } @@ -176,47 +176,47 @@ var _ = Describe("restore internal tests", func() { }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: "bar", ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2.bar'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b'ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b''ar"'::regclass::oid AND attname = 'i');`, }, { Schema: "foo2", Name: `"b.ar"`, ObjectType: toc.OBJ_STATISTICS, - Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) IN + Statement: `DELETE FROM pg_statistic WHERE (starelid, staattnum) = (SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'foo2."b.ar"'::regclass::oid AND attname = 'i');`, }, } From e2ed65e2f5015e9f42582c0c9853bc08b3f194c2 Mon Sep 17 00:00:00 2001 From: Viktor Kurilko Date: Thu, 18 Sep 2025 18:21:14 +0700 Subject: [PATCH 3/3] Enable nested loop for gprestore connections Statistics backups created in gpbackup versions starting from 1.30.5_arenadata16 have inefficient SQL for deleting existing statistics statements. For new backups, this issue has been fixed in the previous commit. This patch fixes the issue for existing backups by enabling nested loop join. This should lead to a more efficient plan with an index scan instead of a seq scan. --- restore/wrappers.go | 1 + 1 file changed, 1 insertion(+) diff --git a/restore/wrappers.go b/restore/wrappers.go index a9f05d00a..8f6bf1c25 100644 --- a/restore/wrappers.go +++ b/restore/wrappers.go @@ -89,6 +89,7 @@ SET client_min_messages = error; SET standard_conforming_strings = on; SET default_with_oids = off; SET optimizer = off; +SET enable_nestloop = on; ` setupQuery += "SET gp_ignore_error_table = on;\n"