Skip to content

Commit 9d40033

Browse files
craig[bot]dhartunianmichae2arulajmani
committed
151076: server: skip TestCheckRestartSafe under duress r=dhartunian a=dhartunian These tests do not need to be run under race or deadlock since they involve a lot of moving parts and a full server. Resolves: #150811 Resolves: #150810 Resolves: #150365 Release note: None 151185: cli: explicitly set disallow_full_table_scans for debug.zips r=ZhouXing19 a=michae2 Fixes: #151183 Release note (bug fix): Fix a bug where debug.zips collected from clusters with disallow_full_table_scans set would have missing system table data. 151201: clusterversion: move WriteInitialTruncStateBeforeSplitApplication to 25.4 r=arulajmani a=arulajmani This cluster version was intended for v25.3, but [we raced with branch cut](#149494 (comment)) and this didn't make it to the release branch. The least risky thing to do is to move this to 25.4 instead of a late stage backport, so let's do that. Epic: none Release note: None Co-authored-by: David Hartunian <davidh@cockroachlabs.com> Co-authored-by: Michael Erickson <michae2@cockroachlabs.com> Co-authored-by: Arul Ajmani <arulajmani@gmail.com>
4 parents 53ce6cb + 1430411 + 34923c6 + c9e4b25 commit 9d40033

File tree

8 files changed

+107
-16
lines changed

8 files changed

+107
-16
lines changed

docs/generated/settings/settings-for-tenants.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,4 +418,4 @@ trace.zipkin.collector string the address of a Zipkin instance to receive trace
418418
ui.database_locality_metadata.enabled boolean true if enabled shows extended locality data about databases and tables in DB Console which can be expensive to compute application
419419
ui.default_timezone string the default timezone used to format timestamps in the ui application
420420
ui.display_timezone enumeration etc/utc the timezone used to format timestamps in the ui. This setting is deprecatedand will be removed in a future version. Use the 'ui.default_timezone' setting instead. 'ui.default_timezone' takes precedence over this setting. [etc/utc = 0, america/new_york = 1] application
421-
version version 1000025.3-upgrading-to-1000025.4-step-002 set the active cluster version in the format '<major>.<minor>' application
421+
version version 1000025.3-upgrading-to-1000025.4-step-004 set the active cluster version in the format '<major>.<minor>' application

docs/generated/settings/settings.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,6 @@
376376
<tr><td><div id="setting-ui-database-locality-metadata-enabled" class="anchored"><code>ui.database_locality_metadata.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>if enabled shows extended locality data about databases and tables in DB Console which can be expensive to compute</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
377377
<tr><td><div id="setting-ui-default-timezone" class="anchored"><code>ui.default_timezone</code></div></td><td>string</td><td><code></code></td><td>the default timezone used to format timestamps in the ui</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
378378
<tr><td><div id="setting-ui-display-timezone" class="anchored"><code>ui.display_timezone</code></div></td><td>enumeration</td><td><code>etc/utc</code></td><td>the timezone used to format timestamps in the ui. This setting is deprecatedand will be removed in a future version. Use the &#39;ui.default_timezone&#39; setting instead. &#39;ui.default_timezone&#39; takes precedence over this setting. [etc/utc = 0, america/new_york = 1]</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
379-
<tr><td><div id="setting-version" class="anchored"><code>version</code></div></td><td>version</td><td><code>1000025.3-upgrading-to-1000025.4-step-002</code></td><td>set the active cluster version in the format &#39;&lt;major&gt;.&lt;minor&gt;&#39;</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
379+
<tr><td><div id="setting-version" class="anchored"><code>version</code></div></td><td>version</td><td><code>1000025.3-upgrading-to-1000025.4-step-004</code></td><td>set the active cluster version in the format &#39;&lt;major&gt;.&lt;minor&gt;&#39;</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
380380
</tbody>
381381
</table>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
zip
2+
----
3+
debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
4+
[cluster] discovering virtual clusters... done
5+
[cluster] creating output file /dev/null... done
6+
[cluster] establishing RPC connection to ...
7+
[cluster] using SQL address: ...
8+
[cluster] requesting data for debug/events... received response... writing JSON output: debug/events.json... done
9+
[cluster] requesting data for debug/rangelog... received response... writing JSON output: debug/rangelog.json... done
10+
[cluster] requesting data for debug/settings... received response... writing JSON output: debug/settings.json... done
11+
[cluster] requesting data for debug/reports/problemranges... received response... writing JSON output: debug/reports/problemranges.json... done
12+
<dumping SQL tables>
13+
[cluster] requesting nodes... received response... writing JSON output: debug/nodes.json... done
14+
[cluster] requesting liveness... received response... writing JSON output: debug/liveness.json... done
15+
[cluster] requesting tenant ranges... received response...
16+
[cluster] requesting tenant ranges: last request failed: rpc error: ...
17+
[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
18+
[cluster] collecting the inflight traces for jobs... received response... done
19+
[node 1] node status... writing JSON output: debug/nodes/1/status.json... done
20+
[node 1] using SQL connection URL: postgresql://...
21+
<dumping SQL tables>
22+
[node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done
23+
[node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done
24+
[node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done
25+
[node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done
26+
[node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done
27+
[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done
28+
[node 1] requesting heap profile list... received response... done
29+
[node ?] ? heap profiles found
30+
[node 1] requesting goroutine dump list... received response... done
31+
[node ?] ? goroutine dumps found
32+
[node 1] requesting cpu profile list... received response... done
33+
[node ?] ? cpu profiles found
34+
[node 1] requesting execution trace list... received response... done
35+
[node ?] ? execution traces found
36+
[node 1] requesting log files list... received response... done
37+
[node ?] ? log files found
38+
[node 1] requesting ranges... received response... done
39+
[node 1] writing ranges... writing JSON output: debug/nodes/1/ranges.json... done
40+
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
41+
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
42+
[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
43+
[cluster] capture debug zip flags... writing binary output: debug/debug_zip_command_flags.txt... done
44+
[cluster] The generated file /dev/null is corrupt. Please retry debug zip generation. error: zip: not a valid zip file
45+
ERROR: zip: not a valid zip file

pkg/cli/zip.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,12 @@ func (zc *debugZipContext) dumpTableDataForZip(
594594
return err
595595
}
596596

597+
// Many of the table data queries use full scans, so allow them.
598+
err = conn.Exec(ctx, `SET disallow_full_table_scans = off`)
599+
if err != nil {
600+
return err
601+
}
602+
597603
w, err := zc.z.createLocked(name, time.Time{})
598604
if err != nil {
599605
return err

pkg/cli/zip_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,45 @@ func TestPartialZip(t *testing.T) {
807807
})
808808
}
809809

810+
// TestZipDisallowFullScans tests that we still can dump full SQL tables for
811+
// debug.zips when the cluster has disallow_full_table_scans enabled.
812+
func TestZipDisallowFullScans(t *testing.T) {
813+
defer leaktest.AfterTest(t)()
814+
defer log.Scope(t).Close(t)
815+
816+
skip.UnderShort(t)
817+
skip.UnderRace(t)
818+
819+
dir, cleanupFn := testutils.TempDir(t)
820+
defer cleanupFn()
821+
822+
c := NewCLITest(TestCLIParams{
823+
StoreSpecs: []base.StoreSpec{{
824+
Path: dir,
825+
}},
826+
})
827+
defer c.Cleanup()
828+
829+
c.RunWithArgs([]string{"sql", "-e", `
830+
SET CLUSTER SETTING sql.defaults.disallow_full_table_scans.enabled = on;
831+
SET CLUSTER SETTING sql.defaults.large_full_scan_rows = 1;
832+
`})
833+
834+
out, err := c.RunWithCapture("debug zip --concurrency=1 --cpu-profile-duration=0 " + os.DevNull)
835+
if err != nil {
836+
t.Fatal(err)
837+
}
838+
839+
// Strip any non-deterministic messages.
840+
out = eraseNonDeterministicZipOutput(out)
841+
842+
// We use datadriven simply to read the golden output file; we don't actually
843+
// run any commands. Using datadriven allows TESTFLAGS=-rewrite.
844+
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "testzip_disallow_full_scans"), func(t *testing.T, td *datadriven.TestData) string {
845+
return out
846+
})
847+
}
848+
810849
// This checks that SQL retry errors are properly handled.
811850
func TestZipRetries(t *testing.T) {
812851
defer leaktest.AfterTest(t)()

pkg/clusterversion/cockroach_versions.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,17 +199,17 @@ const (
199199

200200
V25_3_AddHotRangeLoggerJob
201201

202-
// V25_3_WriteInitialTruncStateBeforeSplitApplication is the version above
203-
// which we write the initial truncated state before applying a split. By
204-
// extension, we no longer need to replicate the truncated state when
205-
// constructing the split write batch.
206-
V25_3_WriteInitialTruncStateBeforeSplitApplication
207-
208202
// V25_3 is CockroachDB v25.3. It's used for all v25.3.x patch releases.
209203
V25_3
210204

211205
V25_4_Start
212206

207+
// V25_4_WriteInitialTruncStateBeforeSplitApplication is the version above
208+
// which we write the initial truncated state before applying a split. By
209+
// extension, we no longer need to replicate the truncated state when
210+
// constructing the split write batch.
211+
V25_4_WriteInitialTruncStateBeforeSplitApplication
212+
213213
// *************************************************
214214
// Step (1) Add new versions above this comment.
215215
// Do not add new versions to a patch release.
@@ -257,12 +257,13 @@ var versionTable = [numKeys]roachpb.Version{
257257

258258
V25_3_AddHotRangeLoggerJob: {Major: 25, Minor: 2, Internal: 8},
259259

260-
V25_3_WriteInitialTruncStateBeforeSplitApplication: {Major: 25, Minor: 2, Internal: 10},
261-
262260
V25_3: {Major: 25, Minor: 3, Internal: 0},
263261

264262
// v25.4 versions. Internal versions must be even.
265263
V25_4_Start: {Major: 25, Minor: 3, Internal: 2},
264+
265+
V25_4_WriteInitialTruncStateBeforeSplitApplication: {Major: 25, Minor: 3, Internal: 4},
266+
266267
// *************************************************
267268
// Step (2): Add new versions above this comment.
268269
// Do not add new versions to a patch release.

pkg/kv/kvserver/batcheval/cmd_end_transaction.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,11 +1482,11 @@ func splitTriggerHelper(
14821482
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to write initial Replica state")
14831483
}
14841484
// TODO(arulajmani): This can be removed once all nodes are past the
1485-
// V25_3_WriteInitialTruncStateBeforeSplitApplication cluster version.
1485+
// V25_4_WriteInitialTruncStateBeforeSplitApplication cluster version.
14861486
// At that point, we'll no longer need to replicate the truncated state
14871487
// as all replicas will be responsible for writing it locally before
14881488
// applying the split.
1489-
if !rec.ClusterSettings().Version.IsActive(ctx, clusterversion.V25_3_WriteInitialTruncStateBeforeSplitApplication) {
1489+
if !rec.ClusterSettings().Version.IsActive(ctx, clusterversion.V25_4_WriteInitialTruncStateBeforeSplitApplication) {
14901490
if err := stateloader.WriteInitialTruncState(ctx, batch, split.RightDesc.RangeID); err != nil {
14911491
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to write initial Replica state")
14921492
}

pkg/server/api_v2_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ func TestCheckRestartSafe_Criticality(t *testing.T) {
338338
defer leaktest.AfterTest(t)()
339339
defer log.Scope(t).Close(t)
340340

341-
skip.WithIssue(t, 150810)
341+
skip.UnderDuress(t)
342342

343343
ctx := context.Background()
344344

@@ -436,7 +436,7 @@ func TestCheckRestartSafe_AllowMinimumQuorum_Pass(t *testing.T) {
436436
defer leaktest.AfterTest(t)()
437437
defer log.Scope(t).Close(t)
438438

439-
skip.WithIssue(t, 150365)
439+
skip.UnderDuress(t)
440440

441441
ctx := context.Background()
442442
var err error
@@ -477,7 +477,7 @@ func TestCheckRestartSafe_AllowMinimumQuorum_Fail(t *testing.T) {
477477
defer leaktest.AfterTest(t)()
478478
defer log.Scope(t).Close(t)
479479

480-
skip.WithIssue(t, 149534)
480+
skip.UnderDuress(t)
481481

482482
ctx := context.Background()
483483
var err error
@@ -512,7 +512,7 @@ func TestCheckRestartSafe_Integration(t *testing.T) {
512512
defer leaktest.AfterTest(t)()
513513
defer log.Scope(t).Close(t)
514514

515-
skip.WithIssue(t, 150811)
515+
skip.UnderDuress(t)
516516

517517
ctx := context.Background()
518518
var err error

0 commit comments

Comments
 (0)