Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ydb/core/cms/cms_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class TCms : public TActor<TCms>, public TTabletExecutedFlat {
class TTxStoreWalleTask;
class TTxUpdateConfig;
class TTxUpdateDowntimes;
class TTxStoreFirstBootTimestamp;

struct TActionOptions {
TDuration PermissionDuration;
Expand Down Expand Up @@ -150,6 +151,7 @@ class TCms : public TActor<TCms>, public TTabletExecutedFlat {
ITransaction *CreateTxUpdateConfig(TEvCms::TEvSetConfigRequest::TPtr &ev);
ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev);
ITransaction *CreateTxUpdateDowntimes();
ITransaction *CreateTxStoreFirstBootTimestamp();

static void AuditLog(const TActorContext &ctx, const TString &message) {
NCms::AuditLog("CMS tablet", message, ctx);
Expand Down
1 change: 1 addition & 0 deletions ydb/core/cms/cms_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct TCmsState : public TAtomicRefCount<TCmsState> {
ui64 NextRequestId = 0;
ui64 NextNotificationId = 0;
ui64 LastLogRecordTimestamp = 0;
TInstant FirstBootTimestamp;

// State of Wall-E tasks.
THashMap<TString, TTaskInfo> WalleTasks;
Expand Down
14 changes: 13 additions & 1 deletion ydb/core/cms/cms_tx_load_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class TCms::TTxLoadState : public TTransactionBase<TCms> {
if (!db.Precharge<Schema>())
return false;

auto paramRow = db.Table<Schema::Param>().Key(1).Select<Schema::Param::TColumns>();
auto paramRow = db.Table<Schema::Param>().Key(Schema::Param::Key).Select<Schema::Param::TColumns>();
auto permissionRowset = db.Table<Schema::Permission>().Range().Select<Schema::Permission::TColumns>();
auto requestRowset = db.Table<Schema::Request>().Range().Select<Schema::Request::TColumns>();
auto walleTaskRowset = db.Table<Schema::WalleTask>().Range().Select<Schema::WalleTask::TColumns>();
Expand All @@ -60,17 +60,23 @@ class TCms::TTxLoadState : public TTransactionBase<TCms> {

NKikimrCms::TCmsConfig config;
if (paramRow.IsValid()) {
FirstBoot = false;

state->NextPermissionId = paramRow.GetValueOrDefault<Schema::Param::NextPermissionID>(1);
state->NextRequestId = paramRow.GetValueOrDefault<Schema::Param::NextRequestID>(1);
state->NextNotificationId = paramRow.GetValueOrDefault<Schema::Param::NextNotificationID>(1);
state->FirstBootTimestamp = TInstant::MicroSeconds(paramRow.GetValueOrDefault<Schema::Param::FirstBootTimestamp>(0));
config = paramRow.GetValueOrDefault<Schema::Param::Config>(NKikimrCms::TCmsConfig());

LOG_DEBUG_S(ctx, NKikimrServices::CMS,
"Loaded config: " << config.ShortDebugString());
} else {
FirstBoot = true;

state->NextPermissionId = 1;
state->NextRequestId = 1;
state->NextNotificationId = 1;
state->FirstBootTimestamp = ctx.Now();

LOG_DEBUG_S(ctx, NKikimrServices::CMS,
"Using default config");
Expand Down Expand Up @@ -261,7 +267,13 @@ class TCms::TTxLoadState : public TTransactionBase<TCms> {
Self->ScheduleLogCleanup(ctx);
Self->ScheduleUpdateClusterInfo(ctx, true);
Self->ProcessInitQueue(ctx);

if (FirstBoot) {
Self->Execute(Self->CreateTxStoreFirstBootTimestamp(), ctx);
}
}
private:
bool FirstBoot = false;
};

ITransaction *TCms::CreateTxLoadState() {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/cms/cms_tx_process_notification.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class TCms::TTxProcessNotification : public TTransactionBase<TCms> {
Response->Record.SetNotificationId(id);

NIceDb::TNiceDb db(txc.DB);
db.Table<Schema::Param>().Key(1)
db.Table<Schema::Param>().Key(Schema::Param::Key)
.Update(NIceDb::TUpdate<Schema::Param::NextNotificationID>(Self->State->NextNotificationId));

TString notificationStr;
Expand Down
33 changes: 33 additions & 0 deletions ydb/core/cms/cms_tx_store_first_boot_timestamp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "cms_impl.h"
#include "scheme.h"

namespace NKikimr::NCms {

class TCms::TTxStoreFirstBootTimestamp : public TTransactionBase<TCms> {
public:
TTxStoreFirstBootTimestamp(TCms *self)
: TBase(self)
{}

TTxType GetTxType() const override { return TXTYPE_STORE_FIRST_BOOT_TIMESTAMP; }

bool Execute(TTransactionContext &txc, const TActorContext &ctx) override {
LOG_DEBUG_S(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Execute");

NIceDb::TNiceDb db(txc.DB);
db.Table<Schema::Param>().Key(Schema::Param::Key)
.Update<Schema::Param::FirstBootTimestamp>(Self->State->FirstBootTimestamp.MicroSeconds());

return true;
}

void Complete(const TActorContext &ctx) override {
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStoreFirstBootTimestamp Complete");
}
};

ITransaction *TCms::CreateTxStoreFirstBootTimestamp() {
return new TTxStoreFirstBootTimestamp(this);
}

} // namespace NKikimr::NCms
6 changes: 4 additions & 2 deletions ydb/core/cms/cms_tx_store_permissions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ class TCms::TTxStorePermissions : public TTransactionBase<TCms> {
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStorePermissions Execute");

NIceDb::TNiceDb db(txc.DB);
db.Table<Schema::Param>().Key(1).Update(NIceDb::TUpdate<Schema::Param::NextPermissionID>(NextPermissionId),
NIceDb::TUpdate<Schema::Param::NextRequestID>(NextRequestId));
db.Table<Schema::Param>().Key(Schema::Param::Key).Update(
NIceDb::TUpdate<Schema::Param::NextPermissionID>(NextPermissionId),
NIceDb::TUpdate<Schema::Param::NextRequestID>(NextRequestId)
);

const auto &rec = Response->Get<TEvCms::TEvPermissionResponse>()->Record;

Expand Down
2 changes: 1 addition & 1 deletion ydb/core/cms/cms_tx_update_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TCms::TTxUpdateConfig : public TTransactionBase<TCms> {

if (!google::protobuf::util::MessageDifferencer::Equals(Config, Self->State->ConfigProto)) {
NIceDb::TNiceDb db(txc.DB);
db.Table<Schema::Param>().Key(1)
db.Table<Schema::Param>().Key(Schema::Param::Key)
.Update<Schema::Param::Config>(Config);

Modify = true;
Expand Down
6 changes: 6 additions & 0 deletions ydb/core/cms/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ struct TCmsSentinelConfig {

TStateStorageSelfHealConfig StateStorageSelfHealConfig;

TDuration InitialDeploymentGracePeriod;

void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
config.SetEnable(Enable);
config.SetDryRun(DryRun);
Expand All @@ -100,6 +102,8 @@ struct TCmsSentinelConfig {

SaveStateLimits(config);
SaveEvictVDisksStatus(config);

config.SetInitialDeploymentGracePeriod(InitialDeploymentGracePeriod.GetValue());
}

void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) {
Expand All @@ -125,6 +129,8 @@ struct TCmsSentinelConfig {
StateLimits.swap(newStateLimits);

EvictVDisksStatus = LoadEvictVDisksStatus(config);

InitialDeploymentGracePeriod = TDuration::MicroSeconds(config.GetInitialDeploymentGracePeriod());
}

void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
Expand Down
5 changes: 4 additions & 1 deletion ydb/core/cms/scheme.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,19 @@ namespace NKikimr::NCms {

struct Schema : NIceDb::Schema {
struct Param : Table<1> {
static constexpr ui32 Key = 1;

struct ID : Column<1, NScheme::NTypeIds::Uint32> {};
struct NextPermissionID : Column<2, NScheme::NTypeIds::Uint64> {};
struct NextRequestID : Column<3, NScheme::NTypeIds::Uint64> {};
struct NextNotificationID : Column<4, NScheme::NTypeIds::Uint64> {};
struct Config : Column<5, NScheme::NTypeIds::String> { using Type = NKikimrCms::TCmsConfig; };
struct LastLogRecordTimestamp : Column<6, NScheme::NTypeIds::Uint64> {};
struct FirstBootTimestamp : Column<7, NScheme::NTypeIds::Uint64> {};

using TKey = TableKey<ID>;
using TColumns = TableColumns<ID, NextPermissionID, NextRequestID, NextNotificationID,
Config, LastLogRecordTimestamp>;
Config, LastLogRecordTimestamp, FirstBootTimestamp>;
};

struct Permission : Table<2> {
Expand Down
39 changes: 33 additions & 6 deletions ydb/core/cms/sentinel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,14 @@ void TNodeStatusComputer::AddState(ENodeState newState) {

/// TPDiskStatusComputer

TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
TPDiskStatusComputer::TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod)
: DefaultStateLimit(defaultStateLimit)
, GoodStateLimit(goodStateLimit)
, StateLimits(stateLimits)
, StateCounter(0)
, CMSFirstBootTimestamp(cmsFirstBootTimestamp)
, InitialDeploymentGracePeriod(initialDeploymentGracePeriod)
{
}

Expand Down Expand Up @@ -171,7 +174,11 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason
}
}

return EPDiskStatus::INACTIVE;
if (IsInitialDeploymentGracePeriod() && State == NKikimrBlobStorage::TPDiskState::Normal) {
return EPDiskStatus::ACTIVE;
} else {
return EPDiskStatus::INACTIVE;
}
}

reason = TStringBuilder()
Expand Down Expand Up @@ -219,10 +226,25 @@ void TPDiskStatusComputer::ResetForcedStatus() {
ForcedStatus.Clear();
}

bool TPDiskStatusComputer::IsInitialDeploymentGracePeriod() const {
if (TlsActivationContext) {
return CMSFirstBootTimestamp + InitialDeploymentGracePeriod > TActivationContext::Now();
} else {
return false; // unsupported outside of actorsystem
}
}

/// TPDiskStatus


TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
: TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits)
: TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits, TInstant::Zero(), TDuration::Zero())
{}

TPDiskStatus::TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit,
const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod)
: TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits, cmsFirstBootTimestamp, initialDeploymentGracePeriod)
, Current(initialStatus)
, ChangingAllowed(true)
{
Expand Down Expand Up @@ -284,8 +306,11 @@ void TPDiskStatus::DisallowChanging() {

/// TPDiskInfo

TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
: TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits)
TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit,
const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod)
: TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits,
cmsFirstBootTimestamp, initialDeploymentGracePeriod)
, ActualStatus(initialStatus)
{
Touch();
Expand Down Expand Up @@ -619,7 +644,9 @@ class TConfigUpdater: public TUpdaterBase<TEvSentinel::TEvConfigUpdated, TConfig
continue;
}

pdisks.emplace(id, new TPDiskInfo(pdisk.GetDriveStatus(), Config.DefaultStateLimit, Config.GoodStateLimit, Config.StateLimits));
pdisks.emplace(id, new TPDiskInfo(pdisk.GetDriveStatus(), Config.DefaultStateLimit,
Config.GoodStateLimit, Config.StateLimits,
CmsState->FirstBootTimestamp, Config.InitialDeploymentGracePeriod));
}

SentinelState->ConfigUpdaterState.GotBSCResponse = true;
Expand Down
15 changes: 13 additions & 2 deletions ydb/core/cms/sentinel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ using TLimitsMap = TMap<EPDiskState, ui32>;

class TPDiskStatusComputer {
public:
explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits);
explicit TPDiskStatusComputer(const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod);

void AddState(EPDiskState state, bool isNodeLocked);
EPDiskStatus Compute(EPDiskStatus current, TString& reason) const;
Expand All @@ -31,6 +32,8 @@ class TPDiskStatusComputer {
bool HasForcedStatus() const;
void ResetForcedStatus();

bool IsInitialDeploymentGracePeriod() const;

private:
const ui32& DefaultStateLimit;
const ui32& GoodStateLimit;
Expand All @@ -43,11 +46,17 @@ class TPDiskStatusComputer {

mutable bool HadBadStateRecently = false;

TInstant CMSFirstBootTimestamp;
const TDuration& InitialDeploymentGracePeriod;

}; // TPDiskStatusComputer

class TPDiskStatus: public TPDiskStatusComputer {
public:
explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits);
explicit TPDiskStatus(EPDiskStatus initialStatus, const ui32& defaultStateLimit,
const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod);

void AddState(EPDiskState state, bool isNodeLocked);
bool IsChanged() const;
Expand Down Expand Up @@ -96,7 +105,9 @@ struct TPDiskInfo
ui32 PrevStatusChangeAttempt = 0;
EIgnoreReason IgnoreReason = NKikimrCms::TPDiskInfo::NOT_IGNORED;

explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits);
explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit,
const ui32& goodStateLimit, const TLimitsMap& stateLimits,
TInstant cmsFirstBootTimestamp, const TDuration& initialDeploymentGracePeriod);

bool IsTouched() const { return Touched; }
void Touch() { Touched = true; }
Expand Down
31 changes: 31 additions & 0 deletions ydb/core/cms/sentinel_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,37 @@ Y_UNIT_TEST_SUITE(TSentinelTests) {
UNIT_ASSERT(computer.GetCurrentNodeState() == TNodeStatusComputer::ENodeState::GOOD);
}

Y_UNIT_TEST(InitialDeploymentGracePeriod) {
NKikimrCms::TCmsConfig config;
config.MutableSentinelConfig()->SetInitialDeploymentGracePeriod(TDuration::Minutes(10).GetValue());
TTestEnv env(8, 4, config);

const TPDiskID id = env.RandomPDiskID();

Cerr << "...Initializing" << Endl;
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Initial, NKikimrBlobStorage::INACTIVE);

Cerr << "...Working normally" << Endl;
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE);

Cerr << "...Disconnected" << Endl;
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE);

Cerr << "...Working normally again" << Endl;
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, NKikimrBlobStorage::ACTIVE);

Cerr << "...Initial deployment grace period is over" << Endl;
env.AdvanceCurrentTime(TDuration::Minutes(15));

Cerr << "...Disconnected" << Endl;
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::NodeDisconnected, NKikimrBlobStorage::INACTIVE);

Cerr << "...Working normally again, but no fast path to ACTIVE" << Endl;
for (ui32 i = 1; i < DefaultStateLimit; ++i) {
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal);
}
env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE);
}
} // TSentinelTests

}
7 changes: 6 additions & 1 deletion ydb/core/cms/sentinel_ut_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TTestEnv: public TCmsTestEnv {
}

public:
explicit TTestEnv(ui32 nodeCount, ui32 pdisks, const NKikimrCms::TCmsConfig &config = {})
explicit TTestEnv(ui32 nodeCount, ui32 pdisks, NKikimrCms::TCmsConfig config = {})
: TCmsTestEnv(nodeCount, pdisks)
{
SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG);
Expand Down Expand Up @@ -123,6 +123,11 @@ class TTestEnv: public TCmsTestEnv {
});

State = new TCmsState;

auto* sentinelConfig = config.MutableSentinelConfig();
if (!sentinelConfig->HasInitialDeploymentGracePeriod()) {
sentinelConfig->SetInitialDeploymentGracePeriod(0);
}
State->Config.Deserialize(config);
MockClusterInfo(State->ClusterInfo);
State->CmsActorId = GetSender();
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/cms/ut_sentinel/ya.make
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
UNITTEST_FOR(ydb/core/cms)

FORK_SUBTESTS()

SIZE(MEDIUM)

PEERDIR(
Expand Down
1 change: 1 addition & 0 deletions ydb/core/cms/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ SRCS(
cms_tx_remove_permissions.cpp
cms_tx_remove_request.cpp
cms_tx_remove_task.cpp
cms_tx_store_first_boot_timestamp.cpp
cms_tx_store_permissions.cpp
cms_tx_store_walle_task.cpp
cms_tx_update_config.cpp
Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/cms.proto
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ message TCmsConfig {
optional uint32 GoodStateLimit = 16 [default = 5];

optional TStateStorageSelfHealConfig StateStorageSelfHealConfig = 19;
optional uint64 InitialDeploymentGracePeriod = 20 [default = 600000000];
}

message TLogConfig {
Expand Down
Loading
Loading