Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "leader_election.h"
#include "probes.h"

#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/base/feature_flags.h>

#include <ydb/core/fq/libs/actors/logging/log.h>
#include <ydb/core/fq/libs/events/events.h>
#include <ydb/core/fq/libs/metrics/sanitize_label.h>
Expand Down Expand Up @@ -117,14 +120,17 @@ struct TQueryStatKeyHash {

struct TAggQueryStat {
TAggQueryStat() = default;
TAggQueryStat(const TString& queryId, const ::NMonitoring::TDynamicCounterPtr& counters, const NYql::NPq::NProto::TDqPqTopicSource& sourceParams)
TAggQueryStat(const TString& queryId, const ::NMonitoring::TDynamicCounterPtr& counters, const NYql::NPq::NProto::TDqPqTopicSource& sourceParams, bool enableStreamingQueriesCounters)
: QueryId(queryId)
, SubGroup(counters) {
for (const auto& sensor : sourceParams.GetTaskSensorLabel()) {
SubGroup = SubGroup->GetSubgroup(sensor.GetLabel(), sensor.GetValue());
}
auto queryGroup = SubGroup->GetSubgroup("query_id", queryId);
auto topicGroup = queryGroup->GetSubgroup("read_group", SanitizeLabel(sourceParams.GetReadGroup()));
auto topicGroup = SubGroup;
if (enableStreamingQueriesCounters) {
topicGroup = topicGroup->GetSubgroup("query_id", queryId);
topicGroup = topicGroup->GetSubgroup("read_group", SanitizeLabel(sourceParams.GetReadGroup()));
}
MaxQueuedBytesCounter = topicGroup->GetCounter("MaxQueuedBytes");
AvgQueuedBytesCounter = topicGroup->GetCounter("AvgQueuedBytes");
MaxReadLagCounter = topicGroup->GetCounter("MaxReadLag");
Expand Down Expand Up @@ -411,6 +417,7 @@ class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
TMap<ui64, TAtomicSharedPtr<TConsumerInfo>> ConsumersByEventQueueId;
THashMap<TTopicSessionKey, TTopicSessionInfo, TTopicSessionKeyHash> TopicSessions;
TMap<TActorId, TReadActorInfo> ReadActorsInternalState;
bool EnableStreamingQueriesCounters = false;

public:
explicit TRowDispatcher(
Expand Down Expand Up @@ -552,6 +559,7 @@ void TRowDispatcher::Bootstrap() {
TlsActivationContext->ActorSystem(), SelfId());
}
NodesTracker.Init(SelfId());
EnableStreamingQueriesCounters = NKikimr::AppData()->FeatureFlags.GetEnableStreamingQueriesCounters();
}

void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
Expand Down Expand Up @@ -665,7 +673,7 @@ void TRowDispatcher::UpdateMetrics() {
TQueryStatKey statKey{consumer->QueryId, key.ReadGroup};
auto& stats = AggrStats.LastQueryStats.emplace(
statKey,
TAggQueryStat(consumer->QueryId, Metrics.Counters, consumer->SourceParams)).first->second;
TAggQueryStat(consumer->QueryId, Metrics.Counters, consumer->SourceParams, EnableStreamingQueriesCounters)).first->second;
stats.Add(partition.Stat, partition.FilteredBytes);
partition.FilteredBytes = 0;
}
Expand Down Expand Up @@ -841,9 +849,11 @@ void TRowDispatcher::UpdateReadActorsInternalState() {
void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
LOG_ROW_DISPATCHER_DEBUG("Received TEvStartSession from " << ev->Sender << ", read group " << ev->Get()->Record.GetSource().GetReadGroup() << ", topicPath " << ev->Get()->Record.GetSource().GetTopicPath() <<
" part id " << JoinSeq(',', ev->Get()->Record.GetPartitionIds()) << " query id " << ev->Get()->Record.GetQueryId() << " cookie " << ev->Cookie);
auto queryGroup = Metrics.Counters->GetSubgroup("query_id", ev->Get()->Record.GetQueryId());
auto topicGroup = queryGroup->GetSubgroup("read_group", SanitizeLabel(ev->Get()->Record.GetSource().GetReadGroup()));
topicGroup->GetCounter("StartSession", true)->Inc();
if (EnableStreamingQueriesCounters) {
auto queryGroup = Metrics.Counters->GetSubgroup("query_id", ev->Get()->Record.GetQueryId());
auto topicGroup = queryGroup->GetSubgroup("read_group", SanitizeLabel(ev->Get()->Record.GetSource().GetReadGroup()));
topicGroup->GetCounter("StartSession", true)->Inc();
}

LWPROBE(StartSession, ev->Sender.ToString(), ev->Get()->Record.GetQueryId(), ev->Get()->Record.ByteSizeLong());

Expand Down
37 changes: 23 additions & 14 deletions ydb/core/fq/libs/row_dispatcher/topic_session.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include "topic_session.h"

#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/base/feature_flags.h>

#include <ydb/core/fq/libs/actors/logging/log.h>
#include <ydb/core/fq/libs/metrics/sanitize_label.h>
#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
Expand All @@ -23,12 +26,16 @@ namespace {
////////////////////////////////////////////////////////////////////////////////

struct TTopicSessionMetrics {
void Init(const ::NMonitoring::TDynamicCounterPtr& counters, const TString& topicPath, const TString& readGroup, ui32 partitionId) {
TopicGroup = counters->GetSubgroup("topic", SanitizeLabel(topicPath));
ReadGroup = TopicGroup->GetSubgroup("read_group", SanitizeLabel(readGroup));
PartitionGroup = ReadGroup->GetSubgroup("partition", ToString(partitionId));

AllSessionsDataRate = ReadGroup->GetCounter("AllSessionsDataRate", true);
void Init(const ::NMonitoring::TDynamicCounterPtr& counters, const TString& topicPath, const TString& readGroupName, ui32 partitionId, bool enableStreamingQueriesCounters) {
const auto topicGroup = counters->GetSubgroup("topic", SanitizeLabel(topicPath));

auto readGroup = topicGroup;
PartitionGroup = topicGroup;
if (enableStreamingQueriesCounters) {
readGroup = topicGroup->GetSubgroup("read_group", SanitizeLabel(readGroupName));
PartitionGroup = readGroup->GetSubgroup("partition", ToString(partitionId));
}
AllSessionsDataRate = readGroup->GetCounter("AllSessionsDataRate", true);
InFlyAsyncInputData = PartitionGroup->GetCounter("InFlyAsyncInputData");
InFlySubscribe = PartitionGroup->GetCounter("InFlySubscribe");
ReconnectRate = PartitionGroup->GetCounter("ReconnectRate", true);
Expand All @@ -37,9 +44,6 @@ struct TTopicSessionMetrics {
WaitEventTimeMs = PartitionGroup->GetHistogram("WaitEventTimeMs", NMonitoring::ExplicitHistogram({5, 20, 100, 500, 2000}));
QueuedBytes = PartitionGroup->GetCounter("QueuedBytes");
}

::NMonitoring::TDynamicCounterPtr TopicGroup;
::NMonitoring::TDynamicCounterPtr ReadGroup;
::NMonitoring::TDynamicCounterPtr PartitionGroup;
::NMonitoring::TDynamicCounters::TCounterPtr InFlyAsyncInputData;
::NMonitoring::TDynamicCounters::TCounterPtr InFlySubscribe;
Expand Down Expand Up @@ -98,7 +102,7 @@ class TTopicSession : public TActorBootstrapped<TTopicSession>, NYql::TTopicEven
struct TClientsInfo : public IClientDataConsumer {
using TPtr = TIntrusivePtr<TClientsInfo>;

TClientsInfo(TTopicSession& self, const TString& logPrefix, const ITopicFormatHandler::TSettings& handlerSettings, const NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev, const NMonitoring::TDynamicCounterPtr& counters, const TString& readGroup, TMaybe<ui64> offset)
TClientsInfo(TTopicSession& self, const TString& logPrefix, const ITopicFormatHandler::TSettings& handlerSettings, const NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev, const NMonitoring::TDynamicCounterPtr& counters, const TString& readGroup, TMaybe<ui64> offset, bool enableStreamingQueriesCounters)
: Self(self)
, LogPrefix(logPrefix)
, HandlerSettings(handlerSettings)
Expand All @@ -122,8 +126,11 @@ class TTopicSession : public TActorBootstrapped<TTopicSession>, NYql::TTopicEven
for (const auto& sensor : ev->Get()->Record.GetSource().GetTaskSensorLabel()) {
Counters = Counters->GetSubgroup(sensor.GetLabel(), sensor.GetValue());
}
auto queryGroup = Counters->GetSubgroup("query_id", QueryId);
auto readSubGroup = queryGroup->GetSubgroup("read_group", SanitizeLabel(readGroup));
auto readSubGroup = Counters;
if (enableStreamingQueriesCounters) {
readSubGroup = readSubGroup->GetSubgroup("query_id", QueryId);
readSubGroup = readSubGroup->GetSubgroup("read_group", SanitizeLabel(readGroup));
}
FilteredDataRate = readSubGroup->GetCounter("FilteredDataRate", true);
RestartSessionByOffsetsByQuery = readSubGroup->GetCounter("RestartSessionByOffsetsByQuery", true);

Expand Down Expand Up @@ -295,6 +302,7 @@ class TTopicSession : public TActorBootstrapped<TTopicSession>, NYql::TTopicEven
TTopicSessionMetrics Metrics;
const ::NMonitoring::TDynamicCounterPtr Counters;
const ::NMonitoring::TDynamicCounterPtr CountersRoot;
bool EnableStreamingQueriesCounters = false;

public:
TTopicSession(
Expand Down Expand Up @@ -419,7 +427,8 @@ TTopicSession::TTopicSession(

void TTopicSession::Bootstrap() {
Become(&TTopicSession::StateFunc);
Metrics.Init(Counters, TopicPath, ReadGroup, PartitionId);
EnableStreamingQueriesCounters = NKikimr::AppData()->FeatureFlags.GetEnableStreamingQueriesCounters();
Metrics.Init(Counters, TopicPath, ReadGroup, PartitionId, EnableStreamingQueriesCounters);
LogPrefix = LogPrefix + " " + SelfId().ToString() + " ";
LOG_ROW_DISPATCHER_DEBUG("Bootstrap " << TopicPathPartition
<< ", Timeout " << Config.GetTimeoutBeforeStartSession() << " sec");
Expand Down Expand Up @@ -783,7 +792,7 @@ void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
const TString& format = source.GetFormat();
ITopicFormatHandler::TSettings handlerSettings = {.ParsingFormat = format ? format : "raw"};

auto clientInfo = Clients.insert({ev->Sender, MakeIntrusive<TClientsInfo>(*this, LogPrefix, handlerSettings, ev, Counters, ReadGroup, offset)}).first->second;
auto clientInfo = Clients.insert({ev->Sender, MakeIntrusive<TClientsInfo>(*this, LogPrefix, handlerSettings, ev, Counters, ReadGroup, offset, EnableStreamingQueriesCounters)}).first->second;
auto formatIt = FormatHandlers.find(handlerSettings);
if (formatIt == FormatHandlers.end()) {
auto config = CreateFormatHandlerConfig(Config, FunctionRegistry, CompileServiceActorId, source.GetSkipJsonErrors());
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/kqp/compute_actor/kqp_compute_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory(

NYql::NDq::RegisterDQSolomonReadActorFactory(*factory, federatedQuerySetup->CredentialsFactory);
NYql::NDq::RegisterDQSolomonWriteActorFactory(*factory, federatedQuerySetup->CredentialsFactory);
NYql::NDq::RegisterDqPqReadActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, nullptr);
NYql::NDq::RegisterDqPqWriteActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, nullptr);
NYql::NDq::RegisterDqPqReadActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, counters->GetKqpCounters()->GetSubgroup("subsystem", "DqSourceTracker"));
NYql::NDq::RegisterDqPqWriteActorFactory(*factory, *federatedQuerySetup->Driver, federatedQuerySetup->CredentialsFactory, federatedQuerySetup->PqGateway, counters->GetKqpCounters()->GetSubgroup("subsystem", "DqSinkTracker"));
}

return factory;
Expand Down
6 changes: 5 additions & 1 deletion ydb/core/kqp/executer_actor/kqp_data_executer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2814,13 +2814,17 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
}
}

auto counters = Counters->Counters->GetKqpCounters();
if (AppData()->FeatureFlags.GetEnableStreamingQueriesCounters()) {
counters = counters->GetSubgroup("path", context->StreamingQueryPath);
}
const auto& checkpointId = context->CheckpointId;
CheckpointCoordinatorId = Register(MakeCheckpointCoordinator(
::NFq::TCoordinatorId(checkpointId, Generation),
NYql::NDq::MakeCheckpointStorageID(),
SelfId(),
{},
Counters->Counters->GetKqpCounters()->GetSubgroup("path", context->StreamingQueryPath),
counters,
graphParams,
stateLoadMode,
streamingDisposition).Release());
Expand Down
8 changes: 8 additions & 0 deletions ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2492,6 +2492,10 @@ void TKqpTasksGraph::BuildReadTasksFromSource(TStageInfo& stageInfo, const TVect

FillReadTaskFromSource(task, sourceName, structuredToken, resourceSnapshot, nodeOffset++);

if (GetMeta().UserRequestContext && GetMeta().UserRequestContext->StreamingQueryPath) {
task.Meta.TaskParams.emplace("query_path", GetMeta().UserRequestContext->StreamingQueryPath);
}

tasksIds.push_back(task.Id);
}

Expand Down Expand Up @@ -2826,6 +2830,10 @@ void TKqpTasksGraph::BuildExternalSinks(const NKqpProto::TKqpSink& sink, TKqpTas
}
}

if (GetMeta().UserRequestContext && GetMeta().UserRequestContext->StreamingQueryPath) {
task.Meta.TaskParams.emplace("query_path", GetMeta().UserRequestContext->StreamingQueryPath);
}

auto& output = task.Outputs[sink.GetOutputIndex()];
output.Type = TTaskOutputType::Sink;
output.SinkType = extSink.GetType();
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/kqp/proxy_service/kqp_proxy_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1819,7 +1819,7 @@ class TKqpProxyService : public TActorBootstrapped<TKqpProxyService> {
"cs",
NKikimr::CreateYdbCredentialsProviderFactory,
*FederatedQuerySetup->Driver,
Counters->GetKqpCounters()->GetSubgroup("subsystem", "storage_service"));
Counters->GetKqpCounters()->GetSubgroup("subsystem", "checkpoints_storage_service"));

CheckpointStorageService = TActivationContext::Register(service.release());
TActivationContext::ActorSystem()->RegisterLocalService(
Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/feature_flags.proto
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,5 @@ message TFeatureFlags {
optional bool DisableMissingDefaultColumnsInBulkUpsert = 215 [default = true];
optional bool EnableColumnTablesBackup = 216 [default = true];
optional bool EnableReplication = 217 [default = true];
optional bool EnableStreamingQueriesCounters = 218 [default = false];
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <yql/essentials/public/issue/yql_issue_message.h>
#include <yql/essentials/utils/log/log.h>
#include <yql/essentials/utils/yql_panic.h>
#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/base/feature_flags.h>
#include <ydb/core/fq/libs/events/events.h>
#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>

Expand Down Expand Up @@ -88,8 +90,11 @@ struct TRowDispatcherReadActorMetrics {
for (const auto& sensor : sourceParams.GetTaskSensorLabel()) {
SubGroup = SubGroup->GetSubgroup(sensor.GetLabel(), sensor.GetValue());
}
auto source = SubGroup->GetSubgroup("tx_id", TxId);
auto task = source->GetSubgroup("task_id", ToString(taskId));
auto task = SubGroup;
if (NKikimr::AppData() && NKikimr::AppData()->FeatureFlags.GetEnableStreamingQueriesCounters()) {
auto source = SubGroup->GetSubgroup("tx_id", TxId);
task = source->GetSubgroup("task_id", ToString(taskId));
}
InFlyGetNextBatch = task->GetCounter("InFlyGetNextBatch");
InFlyAsyncInputData = task->GetCounter("InFlyAsyncInputData");
ReInit = task->GetCounter("ReInit", true);
Expand Down
21 changes: 16 additions & 5 deletions ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#include "dq_pq_read_actor_base.h"
#include "probes.h"

#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/base/feature_flags.h>

#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
#include <ydb/library/actors/core/actor.h>
#include <ydb/library/actors/core/event_local.h>
Expand Down Expand Up @@ -142,12 +145,15 @@ class TDqPqReadActor : public NActors::TActor<TDqPqReadActor>, public NYql::NDq:
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Может в else ветке, если counters != nullptr, просто не добавлять лейбл и репортить агрегированные значения? Или там ещё лейблы потом добавляются?

SubGroup = MakeIntrusive<::NMonitoring::TDynamicCounters>();
}

for (const auto& sensor : sourceParams.GetTaskSensorLabel()) {
SubGroup = SubGroup->GetSubgroup(sensor.GetLabel(), sensor.GetValue());
}
auto source = SubGroup->GetSubgroup("tx_id", TxId);
auto task = source->GetSubgroup("task_id", ToString(taskId));
auto source = SubGroup;
auto task = SubGroup;
if (NKikimr::AppData() && NKikimr::AppData()->FeatureFlags.GetEnableStreamingQueriesCounters()) {
source = source->GetSubgroup("tx_id", TxId);
task = source->GetSubgroup("task_id", ToString(taskId));
}
InFlyAsyncInputData = task->GetCounter("InFlyAsyncInputData");
InFlySubscribe = task->GetCounter("InFlySubscribe");
AsyncInputDataRate = task->GetCounter("AsyncInputDataRate", true);
Expand Down Expand Up @@ -974,12 +980,17 @@ void RegisterDqPqReadActorFactory(TDqAsyncIoFactory& factory, NYdb::TDriver driv
TVector<NPq::NProto::TDqReadTaskParams> readTaskParamsMsg;
ui32 topicPartitionsCount = ExtractPartitionsFromParams(readTaskParamsMsg, args.TaskParams, args.ReadRanges);

auto txId = args.TxId;
auto taskParamsIt = args.TaskParams.find("query_path");
if (taskParamsIt != args.TaskParams.end()) {
txId = taskParamsIt->second;
}
if (!settings.GetSharedReading()) {
return CreateDqPqReadActor(
std::move(settings),
args.InputIndex,
args.StatsLevel,
args.TxId,
txId,
args.TaskId,
args.SecureParams,
std::move(readTaskParamsMsg),
Expand All @@ -998,7 +1009,7 @@ void RegisterDqPqReadActorFactory(TDqAsyncIoFactory& factory, NYdb::TDriver driv
std::move(settings),
args.InputIndex,
args.StatsLevel,
args.TxId,
txId,
args.TaskId,
args.SecureParams,
std::move(readTaskParamsMsg),
Expand Down
17 changes: 14 additions & 3 deletions ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "dq_pq_write_actor.h"
#include "probes.h"

#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/base/feature_flags.h>

#include <ydb/library/actors/core/actor.h>
#include <ydb/library/actors/core/event_local.h>
#include <ydb/library/actors/core/events.h>
Expand Down Expand Up @@ -110,8 +113,11 @@ class TDqPqWriteActor : public NActors::TActor<TDqPqWriteActor>, public IDqCompu
} else {
SubGroup = MakeIntrusive<::NMonitoring::TDynamicCounters>();
}
auto sink = SubGroup->GetSubgroup("tx_id", TxId);
auto task = sink->GetSubgroup("task_id", ToString(taskId));
auto task = SubGroup;
if (NKikimr::AppData() && NKikimr::AppData()->FeatureFlags.GetEnableStreamingQueriesCounters()) {
auto sink = SubGroup->GetSubgroup("tx_id", TxId);
task = sink->GetSubgroup("task_id", ToString(taskId));
}
LastAckLatency = task->GetCounter("LastAckLatencyMs");
InFlyCheckpoints = task->GetCounter("InFlyCheckpoints");
InFlyData = task->GetCounter("InFlyData");
Expand Down Expand Up @@ -569,12 +575,17 @@ void RegisterDqPqWriteActorFactory(TDqAsyncIoFactory& factory, NYdb::TDriver dri
NPq::NProto::TDqPqTopicSink&& settings,
IDqAsyncIoFactory::TSinkArguments&& args)
{
auto txId = args.TxId;
auto taskParamsIt = args.TaskParams.find("query_path");
if (taskParamsIt != args.TaskParams.end()) {
txId = taskParamsIt->second;
}
NLwTraceMonPage::ProbeRegistry().AddProbesList(LWTRACE_GET_PROBES(DQ_PQ_PROVIDER));
return CreateDqPqWriteActor(
std::move(settings),
args.OutputIndex,
args.StatsLevel,
args.TxId,
txId,
args.TaskId,
args.SecureParams,
driver,
Expand Down
3 changes: 2 additions & 1 deletion ydb/tests/fq/streaming/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def get_ydb_config():
erasure=Erasure.MIRROR_3_DC,
extra_feature_flags={
"enable_external_data_sources": True,
"enable_streaming_queries": True
"enable_streaming_queries": True,
"enable_streaming_queries_counters": True
},
query_service_config={"available_external_data_sources": ["Ydb"]},
table_service_config={},
Expand Down
Loading