Skip to content

Commit 90e0cb3

Browse files
committed
BUG/MAJOR: reload: Rework of the reload agent
There are small number of cases where reload will fail when -c passes (e.g. cannot bind socket from bind line), in that case reload would fail and haproxy crashed. To deal with these cases added a new CLI option -s (--restart-cmd), so now when reload fails dataplaneapi restarts haproxy using this parameter with the last known good config file. It assumes that the config file given on the start is valid and copies it with .lkg suffix. On every successful reload that .lkg file is replaced with a working cfg. When the reload fails, it replases the <--config-file> with <--config-file.lkg> restarts the HAProxy and returns the bogus <--config-file>.
1 parent c716ebf commit 90e0cb3

File tree

3 files changed

+120
-21
lines changed

3 files changed

+120
-21
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ HAProxy options:
5959
-b, --haproxy-bin= Path to the haproxy binary file (default: haproxy)
6060
-d, --reload-delay= Minimum delay between two reloads (in s) (default: 5)
6161
-r, --reload-cmd= Reload command
62+
-s, --restart-cmd= Restart command
6263
--reload-retention= Reload retention in days, every older reload id will be deleted (default: 1)
6364
-t, --transaction-dir= Path to the transaction directory (default: /tmp/haproxy)
6465
-m, --master-runtime= Path to the master Runtime API socket
@@ -81,7 +82,7 @@ Help Options:
8182
You can test it by simply running:
8283

8384
```
84-
./dataplaneapi --port 5555 -b /usr/sbin/haproxy -c /etc/haproxy/haproxy.cfg -d 5 -r "service haproxy reload" -u dataplaneapi -t /tmp/haproxy
85+
./dataplaneapi --port 5555 -b /usr/sbin/haproxy -c /etc/haproxy/haproxy.cfg -d 5 -r "service haproxy reload" -s "service haproxy restart" -u dataplaneapi -t /tmp/haproxy
8586
```
8687

8788
Test it out with curl, note that you need user/pass combination setup in HAProxy userlist in haproxy configuration (in above example: /etc/haproxy/haproxy.cfg, userlist controller):

configure_data_plane.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ var haproxyOptions struct {
7171
HAProxy string `short:"b" long:"haproxy-bin" description:"Path to the haproxy binary file" default:"haproxy"`
7272
ReloadDelay int `short:"d" long:"reload-delay" description:"Minimum delay between two reloads (in s)" default:"5"`
7373
ReloadCmd string `short:"r" long:"reload-cmd" description:"Reload command"`
74+
RestartCmd string `short:"s" long:"restart-cmd" description:"Restart command"`
7475
ReloadRetention int `long:"reload-retention" description:"Reload retention in days, every older reload id will be deleted" default:"1"`
7576
TransactionDir string `short:"t" long:"transaction-dir" description:"Path to the transaction directory" default:"/tmp/haproxy"`
7677
MasterRuntime string `short:"m" long:"master-runtime" description:"Path to the master Runtime API socket"`
@@ -139,7 +140,9 @@ func configureAPI(api *operations.DataPlaneAPI) http.Handler {
139140

140141
// Initialize reload agent
141142
ra := &haproxy.ReloadAgent{}
142-
ra.Init(haproxyOptions.ReloadDelay, haproxyOptions.ReloadCmd, haproxyOptions.ReloadRetention)
143+
if err := ra.Init(haproxyOptions.ReloadDelay, haproxyOptions.ReloadCmd, haproxyOptions.RestartCmd, haproxyOptions.ConfigFile, haproxyOptions.ReloadRetention); err != nil {
144+
log.Fatalf("Cannot initialize reload agent: %v", err)
145+
}
143146

144147
// Applies when the Authorization header is set with the Basic scheme
145148
api.BasicAuthAuth = func(user string, pass string) (interface{}, error) {
@@ -498,13 +501,13 @@ func configureNativeClient() *client_native.HAProxyClient {
498501
// Initialize HAProxy native client
499502
confClient, err := configureConfigurationClient()
500503
if err != nil {
501-
log.Fatalf(err.Error())
504+
log.Fatalf("Error initializing configuration client: %v", err)
502505
}
503506

504507
runtimeClient := configureRuntimeClient(confClient)
505508
client := &client_native.HAProxyClient{}
506509
if err := client.Init(confClient, runtimeClient); err != nil {
507-
log.Fatalf("Error setting up native client: %s", err.Error())
510+
log.Fatalf("Error setting up native client: %v", err)
508511
}
509512

510513
return client

haproxy/reload_agent.go

Lines changed: 112 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package haproxy
1818
import (
1919
"bytes"
2020
"fmt"
21+
"io"
22+
"os"
2123
"os/exec"
2224
"strconv"
2325
"strings"
@@ -32,6 +34,7 @@ import (
3234
type reloadCache struct {
3335
failedReloads map[string]*models.Reload
3436
lastSuccess *models.Reload
37+
next string
3538
current string
3639
index int64
3740
retention int
@@ -40,25 +43,40 @@ type reloadCache struct {
4043

4144
// ReloadAgent handles all reloads, scheduled or forced
4245
type ReloadAgent struct {
43-
delay int
44-
cmd string
45-
cache reloadCache
46+
delay int
47+
reloadCmd string
48+
restartCmd string
49+
configFile string
50+
lkgConfigFile string
51+
cache reloadCache
4652
}
4753

4854
// Init a new reload agent
49-
func (ra *ReloadAgent) Init(delay int, cmd string, retention int) {
50-
ra.cmd = cmd
55+
func (ra *ReloadAgent) Init(delay int, reloadCmd string, restartCmd string, configFile string, retention int) error {
56+
ra.reloadCmd = reloadCmd
57+
ra.restartCmd = restartCmd
58+
ra.configFile = configFile
5159
ra.delay = delay
60+
ra.lkgConfigFile = configFile + ".lkg"
5261

62+
// create last known good file, assume it is valid when starting
63+
if err := copyFile(ra.configFile, ra.lkgConfigFile); err != nil {
64+
return err
65+
}
5366
ra.cache.Init(retention)
5467
go ra.handleReloads()
68+
return nil
5569
}
5670

5771
func (ra *ReloadAgent) handleReloads() {
5872
for {
5973
select {
6074
case <-time.After(time.Duration(ra.delay) * time.Second):
61-
if ra.cache.current != "" {
75+
if ra.cache.next != "" {
76+
ra.cache.mu.Lock()
77+
ra.cache.current = ra.cache.next
78+
ra.cache.next = ""
79+
ra.cache.mu.Unlock()
6280
response, err := ra.reloadHAProxy()
6381
if err != nil {
6482
ra.cache.failReload(response)
@@ -72,30 +90,71 @@ func (ra *ReloadAgent) handleReloads() {
7290
}
7391

7492
func (ra *ReloadAgent) reloadHAProxy() (string, error) {
75-
strArr := strings.Split(ra.cmd, " ")
76-
var cmd *exec.Cmd
93+
// try the reload
94+
log.Debug("Reload started...")
95+
t := time.Now()
96+
output, err := execCmd(ra.reloadCmd)
97+
log.Debug("Reload finished.")
98+
log.Debug("Time elapsed: ", time.Since(t))
99+
if err != nil {
100+
// if failed, return to last known good file and restart and return the original file
101+
log.Info("Reload failed, restarting with last known good config...")
102+
if err := copyFile(ra.configFile, ra.configFile+".bck"); err != nil {
103+
return fmt.Sprintf("Reload failed: %s, failed to backup original config file for restart.", output), err
104+
}
105+
defer func() {
106+
copyFile(ra.configFile+".bck", ra.configFile)
107+
os.Remove(ra.configFile+".bck")
108+
}()
109+
if err := copyFile(ra.lkgConfigFile, ra.configFile); err != nil {
110+
return fmt.Sprintf("Reload failed: %s, failed to revert to last known good config file", output), err
111+
}
112+
if err := ra.restartHAProxy(); err != nil {
113+
log.Warn("Restart failed, please check the reason and restart manually: ", err)
114+
return fmt.Sprintf("Reload failed: %s, failed to restart HAProxy, please check and start manually", output), err
115+
}
116+
log.Debug("HAProxy restarted with last known good config.")
117+
return output, err
118+
}
119+
log.Debug("Reload succesful")
120+
// if success, replace last known good file
121+
copyFile(ra.configFile, ra.lkgConfigFile)
122+
return output, nil
123+
}
124+
125+
func (ra *ReloadAgent) restartHAProxy() error {
126+
_, err := execCmd(ra.restartCmd)
127+
if err != nil {
128+
return err
129+
}
130+
return nil
131+
}
132+
133+
func execCmd(cmd string) (string, error) {
134+
strArr := strings.Split(cmd, " ")
135+
var c *exec.Cmd
77136
if len(strArr) == 1 {
78-
cmd = exec.Command(strArr[0])
137+
c = exec.Command(strArr[0])
79138
} else {
80-
cmd = exec.Command(strArr[0], strArr[1:]...)
139+
c = exec.Command(strArr[0], strArr[1:]...)
81140
}
82141
var stdout, stderr bytes.Buffer
83-
cmd.Stdout = &stdout
84-
cmd.Stderr = &stderr
142+
c.Stdout = &stdout
143+
c.Stderr = &stderr
85144

86-
err := cmd.Run()
145+
err := c.Run()
87146
if err != nil {
88-
return string(stderr.Bytes()), err
147+
return string(stderr.Bytes()), fmt.Errorf("Executing %s failed: %s", cmd, err)
89148
}
90149
return string(stdout.Bytes()), nil
91150
}
92151

93152
// Reload schedules a reload
94153
func (ra *ReloadAgent) Reload() string {
95-
if ra.cache.current == "" {
154+
if ra.cache.next == "" {
96155
ra.cache.newReload()
97156
}
98-
return ra.cache.current
157+
return ra.cache.next
99158
}
100159

101160
// ForceReload calls reload directly
@@ -112,6 +171,7 @@ func (rc *reloadCache) Init(retention int) {
112171
defer rc.mu.Unlock()
113172
rc.failedReloads = make(map[string]*models.Reload)
114173
rc.current = ""
174+
rc.next = ""
115175
rc.lastSuccess = nil
116176
rc.index = 0
117177
rc.retention = retention
@@ -120,7 +180,7 @@ func (rc *reloadCache) Init(retention int) {
120180
func (rc *reloadCache) newReload() {
121181
rc.mu.Lock()
122182
defer rc.mu.Unlock()
123-
rc.current = rc.generateID()
183+
rc.next = rc.generateID()
124184
}
125185

126186
func (rc *reloadCache) failReload(response string) {
@@ -184,6 +244,14 @@ func (ra *ReloadAgent) GetReloads() models.Reloads {
184244
}
185245
v = append(v, r)
186246
}
247+
248+
if ra.cache.next != "" {
249+
r := &models.Reload{
250+
ID: ra.cache.next,
251+
Status: "in_progress",
252+
}
253+
v = append(v, r)
254+
}
187255
return v
188256
}
189257

@@ -197,6 +265,13 @@ func (ra *ReloadAgent) GetReload(id string) *models.Reload {
197265
Status: "in_progress",
198266
}
199267
}
268+
if ra.cache.next == id {
269+
return &models.Reload{
270+
ID: ra.cache.current,
271+
Status: "in_progress",
272+
}
273+
}
274+
200275
v, ok := ra.cache.failedReloads[id]
201276
if ok {
202277
return v
@@ -269,3 +344,23 @@ func (e *ReloadError) Error() string {
269344
func NewReloadError(msg string) *ReloadError {
270345
return &ReloadError{msg: msg}
271346
}
347+
348+
func copyFile(src, dest string) error {
349+
srcContent, err := os.Open(src)
350+
if err != nil {
351+
return err
352+
}
353+
defer srcContent.Close()
354+
355+
destContent, err := os.Create(dest)
356+
if err != nil {
357+
return err
358+
}
359+
defer destContent.Close()
360+
361+
_, err = io.Copy(destContent, srcContent)
362+
if err != nil {
363+
return err
364+
}
365+
return destContent.Sync()
366+
}

0 commit comments

Comments
 (0)