Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cmd/mysync/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ import (
"github.com/yandex/mysync/internal/app"
)

var (
infoZone string
infoLag string
infoHost string
)

var infoCmd = &cobra.Command{
Use: "info",
Short: "Print information from DCS",
Expand All @@ -18,10 +24,13 @@ var infoCmd = &cobra.Command{
fmt.Println(err)
os.Exit(1)
}
os.Exit(app.CliInfo(short))
os.Exit(app.CliInfo(short, infoZone, infoLag, infoHost))
},
}

func init() {
infoCmd.Flags().StringVar(&infoZone, "zone", "", "show only hosts from the specified zone")
infoCmd.Flags().StringVar(&infoLag, "lag", "", "filter health block by replication lag, e.g. >10 or <5")
infoCmd.Flags().StringVar(&infoHost, "host", "", "show only hosts containing the specified substring")
rootCmd.AddCommand(infoCmd)
}
249 changes: 180 additions & 69 deletions internal/app/cli_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,116 +3,227 @@ package app
import (
"fmt"
"sort"
"strconv"
"strings"

nodestate "github.com/yandex/mysync/internal/app/node_state"
"gopkg.in/yaml.v2"

"github.com/yandex/mysync/internal/dcs"
)

type lagFilter struct {
op byte
value float64
}

// CliInfo is CLI command printing information from DCS to the stdout
func (app *App) CliInfo(short bool) int {
func (app *App) CliInfo(short bool, zone string, lag string, hostFilter string) int {
cancel, err := app.cliInitApp()
if err != nil {
app.logger.Error(err.Error())
return 1
}
defer cancel()

parsedLag, err := parseLagFilter(lag)
if err != nil {
app.logger.Error(err.Error())
return 1
}

var tree any
if short {
data := make(map[string]any)

haNodes, err := app.cluster.GetClusterHAHostsFromDcs()
if err != nil {
app.logger.Errorf("failed to get ha nodes: %v", err)
return 1
}
data[pathHANodes] = haNodes

cascadeNodes, err := app.cluster.GetClusterCascadeHostsFromDcs()
tree, err = app.buildShortInfo(zone, hostFilter, parsedLag)
if err != nil {
app.logger.Errorf("failed to get cascade nodes: %v", err)
app.logger.Error(err.Error())
return 1
}
data[pathCascadeNodesPrefix] = cascadeNodes

activeNodes, err := app.GetActiveNodes()
} else {
tree, err = app.dcs.GetTree("")
if err != nil {
app.logger.Error(err.Error())
return 1
}
sort.Strings(activeNodes)
data[pathActiveNodes] = activeNodes
}
data, err := yaml.Marshal(tree)
if err != nil {
app.logger.Errorf("failed to marshal yaml: %v", err)
return 1
}
fmt.Print(string(data))
return 0
}

nodesOnRecovery, err := app.GetHostsOnRecovery()
if err != nil {
app.logger.Errorf("failed to get nodes on recovery: %v", err)
return 1
}
func (app *App) buildShortInfo(zone string, hostFilter string, parsedLag *lagFilter) (map[string]any, error) {
data := make(map[string]any)

haNodes, err := app.cluster.GetClusterHAHostsFromDcs()
if err != nil {
return nil, fmt.Errorf("failed to get ha nodes: %v", err)
}
data[pathHANodes] = filterHostsMap(haNodes, zone, hostFilter, app.config.OfflineModeAZSeparator)

cascadeNodes, err := app.cluster.GetClusterCascadeHostsFromDcs()
if err != nil {
return nil, fmt.Errorf("failed to get cascade nodes: %v", err)
}
data[pathCascadeNodesPrefix] = filterHostsMap(cascadeNodes, zone, hostFilter, app.config.OfflineModeAZSeparator)

activeNodes, err := app.GetActiveNodes()
if err != nil {
return nil, err
}
activeNodes = filterHostsByFilters(activeNodes, zone, hostFilter, app.config.OfflineModeAZSeparator)
sort.Strings(activeNodes)
data[pathActiveNodes] = activeNodes

nodesOnRecovery, err := app.GetHostsOnRecovery()
if err != nil {
return nil, fmt.Errorf("failed to get nodes on recovery: %v", err)
}
if nodesOnRecovery != nil {
nodesOnRecovery = filterHostsByFilters(nodesOnRecovery, zone, hostFilter, app.config.OfflineModeAZSeparator)
if len(nodesOnRecovery) > 0 {
sort.Strings(nodesOnRecovery)
data[pathRecovery] = nodesOnRecovery
}
}

clusterState, err := app.getClusterStateFromDcs()
if err != nil {
app.logger.Errorf("failed to get cluster state: %v", err)
return 1
clusterState, err := app.getClusterStateFromDcs()
if err != nil {
return nil, fmt.Errorf("failed to get cluster state: %v", err)
}

var master string
err = app.dcs.Get(pathMasterNode, &master)
if err != nil && err != dcs.ErrNotFound {
return nil, fmt.Errorf("failed to get %s: %v", pathMasterNode, err)
}

health := make([]string, 0, len(clusterState))
if state, ok := clusterState[master]; ok && filterHostsArray(master, zone, hostFilter, app.config.OfflineModeAZSeparator) && matchesLagFilter(state, parsedLag) {
health = append(health, fmt.Sprintf("===> %q: %s", master, state.String()))
}
healthHosts := make([]string, 0, len(clusterState))
for host, state := range clusterState {
if host == master {
continue
}
health := make(map[string]any)
for host, state := range clusterState {
health[host] = state.String()
if !filterHostsArray(host, zone, hostFilter, app.config.OfflineModeAZSeparator) {
continue
}
data[pathHealthPrefix] = health

for _, path := range []string{pathLastSwitch, pathCurrentSwitch, pathLastRejectedSwitch} {
var switchover Switchover
err = app.dcs.Get(path, &switchover)
if err == nil {
data[path] = switchover.String()
} else if err != dcs.ErrNotFound {
app.logger.Errorf("failed to get %s: %v", path, err)
return 1
}
if !matchesLagFilter(state, parsedLag) {
continue
}
healthHosts = append(healthHosts, host)
}
sort.Strings(healthHosts)
for _, host := range healthHosts {
health = append(health, fmt.Sprintf(" %q: %s", host, clusterState[host].String()))
}
data[pathHealthPrefix] = health

var maintenance Maintenance
err = app.dcs.Get(pathMaintenance, &maintenance)
for _, path := range []string{pathLastSwitch, pathCurrentSwitch, pathLastRejectedSwitch} {
var switchover Switchover
err = app.dcs.Get(path, &switchover)
if err == nil {
data[pathMaintenance] = maintenance.String()
data[path] = switchover.String()
} else if err != dcs.ErrNotFound {
app.logger.Errorf("failed to get %s: %v", pathMaintenance, err)
return 1
return nil, fmt.Errorf("failed to get %s: %v", path, err)
}
}

var manager dcs.LockOwner
err = app.dcs.Get(pathManagerLock, &manager)
if err != nil && err != dcs.ErrNotFound {
app.logger.Errorf("failed to get %s: %v", pathManagerLock, err)
return 1
}
data[pathManagerLock] = manager.Hostname
var maintenance Maintenance
err = app.dcs.Get(pathMaintenance, &maintenance)
if err == nil {
data[pathMaintenance] = maintenance.String()
} else if err != dcs.ErrNotFound {
return nil, fmt.Errorf("failed to get %s: %v", pathMaintenance, err)
}

var master string
err = app.dcs.Get(pathMasterNode, &master)
if err != nil && err != dcs.ErrNotFound {
app.logger.Errorf("failed to get %s: %v", pathMasterNode, err)
return 1
var manager dcs.LockOwner
err = app.dcs.Get(pathManagerLock, &manager)
if err != nil && err != dcs.ErrNotFound {
return nil, fmt.Errorf("failed to get %s: %v", pathManagerLock, err)
}
data[pathManagerLock] = manager.Hostname
data[pathMasterNode] = master

return data, nil
}

func filterHostsByFilters(hosts []string, zone string, hostFilter string, separator string) []string {
if zone == "" && hostFilter == "" {
return hosts
}
filtered := make([]string, 0, len(hosts))
for _, host := range hosts {
if filterHostsArray(host, zone, hostFilter, separator) {
filtered = append(filtered, host)
}
data[pathMasterNode] = master
tree = data
} else {
tree, err = app.dcs.GetTree("")
if err != nil {
app.logger.Error(err.Error())
return 1
}
return filtered
}

func filterHostsMap[T any](hosts map[string]T, zone string, hostFilter string, separator string) map[string]T {
if zone == "" && hostFilter == "" {
return hosts
}
filtered := make(map[string]T)
for host, value := range hosts {
if filterHostsArray(host, zone, hostFilter, separator) {
filtered[host] = value
}
}
data, err := yaml.Marshal(tree)
return filtered
}

func filterHostsArray(host string, zone string, hostFilter string, separator string) bool {
if zone != "" && getAvailabilityZone(host, separator) != zone {
return false
}
if hostFilter != "" && !strings.Contains(host, hostFilter) {
return false
}
return true
}

// Simple parser for expressions like ">10 or <10"
func parseLagFilter(raw string) (*lagFilter, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, nil
}
if len(raw) < 2 {
return nil, fmt.Errorf("invalid lag filter %q: expected format >10 or <10", raw)
}
op := raw[0]
if op != '>' && op != '<' {
return nil, fmt.Errorf("invalid lag filter %q: expected format >10 or <10", raw)
}
value, err := strconv.ParseFloat(strings.TrimSpace(raw[1:]), 64)
if err != nil {
app.logger.Errorf("failed to marshal yaml: %v", err)
return 1
return nil, fmt.Errorf("invalid lag filter %q: %v", raw, err)
}
fmt.Print(string(data))
return 0
return &lagFilter{op: op, value: value}, nil
}

func matchesLagFilter(state *nodestate.NodeState, filter *lagFilter) bool {
if filter == nil {
return true
}
if state == nil || state.SlaveState == nil {
// if we meet master, its replication lag considered as 0
if state.SlaveState == nil {
return filter.op == '<'
}
return filter.op == '>'
}

lag := *state.SlaveState.ReplicationLag
if filter.op == '>' {
return lag > filter.value
}
return lag < filter.value
}
63 changes: 63 additions & 0 deletions tests/features/CLI.feature
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,66 @@ Feature: CLI
maintenance disabled
"""
And zookeeper node "/test/maintenance" should not exist within "30" seconds

Scenario: CLI info short prints cluster nodes
Given cluster is up and running
Then mysql host "mysql1" should be master
And mysql host "mysql2" should be replica of "mysql1"
And mysql replication on host "mysql2" should run fine within "5" seconds
And mysql host "mysql3" should be replica of "mysql1"
And mysql replication on host "mysql3" should run fine within "5" seconds

When I run command on host "mysql1"
"""
mysync info -s
"""
Then command return code should be "0"
And command output should match regexp
"""
(?s).*ha_nodes:.*mysql1.*mysql2.*mysql3.*
"""
And command output should match regexp
"""
(?s).*active_nodes:.*mysql1.*mysql2.*mysql3.*
"""
And command output should match regexp
"""
(?s).*health:\n- '===> "mysql1": .*\n- ' "mysql2": .*\n- ' "mysql3": .*
"""

Scenario: CLI info short filters health by host and zone
Given cluster is up and running
Then mysql host "mysql1" should be master
And mysql host "mysql2" should be replica of "mysql1"
And mysql replication on host "mysql2" should run fine within "5" seconds
And mysql host "mysql3" should be replica of "mysql1"
And mysql replication on host "mysql3" should run fine within "5" seconds

When I run command on host "mysql1"
"""
mysync info -s --host mysql2
"""
Then command return code should be "0"
And command output should match regexp
"""
(?s).*health:.*"mysql2".*
"""
And command output should not match regexp
"""
(?s).*health:.*"mysql1".*
"""

When I run command on host "mysql1"
"""
mysync info -s --zone klg
"""
Then command return code should be "0"
And command output should not match regexp
"""
(?s).*health:.*"mysql2".*
"""
And command output should not match regexp
"""
(?s).*health:.*"mysql1".*
"""

Loading
Loading