@@ -253,9 +253,10 @@ func _main(ctx context.Context, buildInfo models.BuildInformation,
|
||||
} // TODO move inside firewall?
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
healthy := make(chan bool)
|
||||
|
||||
openvpnLooper := openvpn.NewLooper(allSettings.OpenVPN, nonRootUsername, puid, pgid, allServers,
|
||||
ovpnConf, firewallConf, routingConf, logger, httpClient, os.OpenFile, tunnelReadyCh, cancel)
|
||||
ovpnConf, firewallConf, routingConf, logger, httpClient, os.OpenFile, tunnelReadyCh, healthy, cancel)
|
||||
wg.Add(1)
|
||||
// wait for restartOpenvpn
|
||||
go openvpnLooper.Run(ctx, wg)
|
||||
@@ -302,7 +303,7 @@ func _main(ctx context.Context, buildInfo models.BuildInformation,
|
||||
healthcheckServer := healthcheck.NewServer(
|
||||
constants.HealthcheckAddress, logger)
|
||||
wg.Add(1)
|
||||
go healthcheckServer.Run(ctx, wg)
|
||||
go healthcheckServer.Run(ctx, healthy, wg)
|
||||
|
||||
// Start openvpn for the first time in a blocking call
|
||||
// until openvpn is launched
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func (s *server) runHealthcheckLoop(ctx context.Context, wg *sync.WaitGroup) {
|
||||
func (s *server) runHealthcheckLoop(ctx context.Context, healthy chan<- bool, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
for {
|
||||
previousErr := s.handler.getErr()
|
||||
@@ -18,6 +18,12 @@ func (s *server) runHealthcheckLoop(ctx context.Context, wg *sync.WaitGroup) {
|
||||
err := healthCheck(ctx, s.resolver)
|
||||
s.handler.setErr(err)
|
||||
|
||||
// Notify the healthy channel, or not if it's already full
|
||||
select {
|
||||
case healthy <- err == nil:
|
||||
default:
|
||||
}
|
||||
|
||||
if previousErr != nil && err == nil {
|
||||
s.logger.Info("healthy!")
|
||||
} else if previousErr == nil && err != nil {
|
||||
@@ -36,8 +42,8 @@ func (s *server) runHealthcheckLoop(ctx context.Context, wg *sync.WaitGroup) {
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Success, check again in 10 minutes
|
||||
const period = 10 * time.Minute
|
||||
// Success, check again in 5 seconds
|
||||
const period = 5 * time.Second
|
||||
timer := time.NewTimer(period)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
)
|
||||
|
||||
type Server interface {
|
||||
Run(ctx context.Context, wg *sync.WaitGroup)
|
||||
Run(ctx context.Context, healthy chan<- bool, wg *sync.WaitGroup)
|
||||
}
|
||||
|
||||
type server struct {
|
||||
@@ -32,12 +32,12 @@ func NewServer(address string, logger logging.Logger) Server {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) Run(ctx context.Context, wg *sync.WaitGroup) {
|
||||
func (s *server) Run(ctx context.Context, healthy chan<- bool, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
internalWg := &sync.WaitGroup{}
|
||||
internalWg.Add(1)
|
||||
go s.runHealthcheckLoop(ctx, internalWg)
|
||||
go s.runHealthcheckLoop(ctx, healthy, internalWg)
|
||||
|
||||
server := http.Server{
|
||||
Addr: s.address,
|
||||
|
||||
@@ -45,6 +45,7 @@ type looper struct {
|
||||
client *http.Client
|
||||
openFile os.OpenFileFunc
|
||||
tunnelReady chan<- struct{}
|
||||
healthy <-chan bool
|
||||
cancel context.CancelFunc
|
||||
// Internal channels and locks
|
||||
loopLock sync.Mutex
|
||||
@@ -54,15 +55,19 @@ type looper struct {
|
||||
portForwardSignals chan net.IP
|
||||
crashed bool
|
||||
backoffTime time.Duration
|
||||
healthWaitTime time.Duration
|
||||
}
|
||||
|
||||
const defaultBackoffTime = 15 * time.Second
|
||||
const (
|
||||
defaultBackoffTime = 15 * time.Second
|
||||
defaultHealthWaitTime = 6 * time.Second
|
||||
)
|
||||
|
||||
func NewLooper(settings configuration.OpenVPN,
|
||||
username string, puid, pgid int, allServers models.AllServers,
|
||||
conf Configurator, fw firewall.Configurator, routing routing.Routing,
|
||||
logger logging.Logger, client *http.Client, openFile os.OpenFileFunc,
|
||||
tunnelReady chan<- struct{}, cancel context.CancelFunc) Looper {
|
||||
tunnelReady chan<- struct{}, healthy <-chan bool, cancel context.CancelFunc) Looper {
|
||||
return &looper{
|
||||
state: state{
|
||||
status: constants.Stopped,
|
||||
@@ -80,6 +85,7 @@ func NewLooper(settings configuration.OpenVPN,
|
||||
client: client,
|
||||
openFile: openFile,
|
||||
tunnelReady: tunnelReady,
|
||||
healthy: healthy,
|
||||
cancel: cancel,
|
||||
start: make(chan struct{}),
|
||||
running: make(chan models.LoopStatus),
|
||||
@@ -87,6 +93,7 @@ func NewLooper(settings configuration.OpenVPN,
|
||||
stopped: make(chan struct{}),
|
||||
portForwardSignals: make(chan net.IP),
|
||||
backoffTime: defaultBackoffTime,
|
||||
healthWaitTime: defaultHealthWaitTime,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,6 +222,22 @@ func (l *looper) Run(ctx context.Context, wg *sync.WaitGroup) { //nolint:gocogni
|
||||
l.logAndWait(ctx, err)
|
||||
l.crashed = true
|
||||
stayHere = false
|
||||
case healthy := <-l.healthy:
|
||||
if healthy {
|
||||
continue
|
||||
}
|
||||
// ensure it stays unhealthy for some time before restarting it
|
||||
healthy = l.waitForHealth(ctx)
|
||||
if healthy || ctx.Err() != nil {
|
||||
continue
|
||||
}
|
||||
l.crashed = true // flag as crashed
|
||||
l.state.setStatusWithLock(constants.Stopping)
|
||||
l.logger.Warn("unhealthy program: restarting openvpn")
|
||||
openvpnCancel()
|
||||
<-waitError
|
||||
l.state.setStatusWithLock(constants.Stopped)
|
||||
stayHere = false
|
||||
}
|
||||
}
|
||||
close(waitError)
|
||||
@@ -240,6 +263,35 @@ func (l *looper) logAndWait(ctx context.Context, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// waitForHealth waits for a true healthy signal
|
||||
// after restarting openvpn in order to avoid restarting
|
||||
// openvpn in a loop as it requires a few seconds to connect.
|
||||
func (l *looper) waitForHealth(ctx context.Context) (healthy bool) {
|
||||
l.logger.Info("unhealthy program: waiting %s for it to change to healthy", l.healthWaitTime)
|
||||
timer := time.NewTimer(l.healthWaitTime)
|
||||
l.healthWaitTime *= 2
|
||||
for {
|
||||
select {
|
||||
case healthy = <-l.healthy:
|
||||
if !healthy {
|
||||
break
|
||||
}
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
l.healthWaitTime = defaultHealthWaitTime
|
||||
return true
|
||||
case <-timer.C:
|
||||
return false
|
||||
case <-ctx.Done():
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// portForward is a blocking operation which may or may not be infinite.
|
||||
// You should therefore always call it in a goroutine.
|
||||
func (l *looper) portForward(ctx context.Context, wg *sync.WaitGroup,
|
||||
|
||||
Reference in New Issue
Block a user