From 1991b9183be2dcafd61b63248c329cfff3ec8e67 Mon Sep 17 00:00:00 2001 From: Uriel Fanelli Date: Mon, 14 Aug 2023 11:52:07 +0200 Subject: [PATCH] Making more robust in case ping fails. --- interface.go | 81 ++++++++++++++++++++++++++++++++++++++++++++++++- orchestrator.go | 65 ++++++++++++++++++++++++++++----------- 2 files changed, 128 insertions(+), 18 deletions(-) diff --git a/interface.go b/interface.go index f17b341..d0cc5c0 100644 --- a/interface.go +++ b/interface.go @@ -111,12 +111,29 @@ func (b *AbstractBridge) removeIPandBridgeInt() { } -func isActive(bridgeip string) bool { +func (b *AbstractBridge) IsActive() bool { + + defer func() { + if r := recover(); r != nil { + fmt.Println("An error happened in , but Zoreide recovered. ") + fmt.Println("Error was: ", r) + } + }() + + var bridgeip string + + brIp, _, err := net.ParseCIDR(b.BridgeIpCIDR) + if err != nil { + log.Println("IsActive : problem parsing the IP/CIDR: ", err.Error()) + } else { + bridgeip = brIp.String() + } log.Println("Check for active IP: ", bridgeip) pinger, err := ping.NewPinger(bridgeip) if err != nil { + log.Println("Unable to ping address: ", bridgeip) log.Println("Ping error: " + err.Error()) } // just in case it doesn't stops alone @@ -133,3 +150,65 @@ func isActive(bridgeip string) bool { return stats.PacketsRecv == pinger.Count } + +func (b *AbstractBridge) IsAssigned() bool { + + // we want the program to recover in case of issues + defer func() { + if r := recover(); r != nil { + + fmt.Println("An error happened in , but Zoreide recovered. ") + fmt.Println("Error was: ", r) + } + }() + + var ( + ief *net.Interface + addrs []net.Addr + ipv4Addr net.IP + err error + bridgeip string + ) + + brIp, _, err := net.ParseCIDR(b.BridgeIpCIDR) + if err != nil { + log.Println("IsActive : problem parsing the IP/CIDR: ", err.Error()) + return false + } else { + bridgeip = brIp.String() + } + + interfaceName := b.ExistingInterface + + if ief, err = net.InterfaceByName(interfaceName); err != nil { // get interface + log.Printf("Interface %s does not exist or not manageable\n", interfaceName) + log.Printf("Error is: %s\n", err.Error()) + return false + } + if addrs, err = ief.Addrs(); err != nil { // get addresses + log.Printf("Cannot read IPs of interface %s\n", interfaceName) + log.Printf("Error is: %s\n", err.Error()) + return false + } + for _, addr := range addrs { // get ipv4 address + if ipv4Addr = addr.(*net.IPNet).IP.To4(); ipv4Addr != nil { + log.Printf("Error reading IPs for interface %s\n", interfaceName) + log.Printf("Error is: %s\n", err.Error()) + break + } + } + if ipv4Addr == nil { + log.Printf("interface %s don't have an ipv4 address\n", interfaceName) + return false + } + + for _, addr := range addrs { + if addr.String() == bridgeip { + log.Printf("Ip %s is assigned to interface %s", bridgeip, interfaceName) + return true + } + } + + log.Printf("Ip %s is NOT assigned to interface %s", bridgeip, interfaceName) + return false +} diff --git a/orchestrator.go b/orchestrator.go index 443ef83..b3df67e 100644 --- a/orchestrator.go +++ b/orchestrator.go @@ -3,7 +3,6 @@ package main import ( "fmt" "log" - "net" "regexp" "slices" "strconv" @@ -55,6 +54,13 @@ func (mip *AbstractMulticast) WriteNumberToMulticast(br AbstractBridge) { func (mip *AbstractMulticast) ReadNumberFromMulticast() { + defer func() { + if r := recover(); r != nil { + fmt.Println("An error happened in , but Zoreide recovered. ") + fmt.Println("Error was: ", r) + } + }() + log.Println("Initiating reader") buffer := make([]byte, mip.MaxDatagramSize) @@ -74,6 +80,13 @@ func (mip *AbstractMulticast) ReadNumberFromMulticast() { func (b *AbstractBridge) HierarchyReLocator(entity AbstractMulticast) { + defer func() { + if r := recover(); r != nil { + fmt.Println("An error happened in , but Zoreide recovered. ") + fmt.Println("Error was: ", r) + } + }() + log.Println("Inizializing HierarchyManager") entity.AddUniqueAndSort(b.hIerarchyNumber) @@ -101,26 +114,26 @@ func (b *AbstractBridge) HierarchyReLocator(entity AbstractMulticast) { entity.AddUniqueAndSort(brdNumber) } - brIp, _, err := net.ParseCIDR(b.BridgeIpCIDR) - if err != nil { - log.Println(err.Error()) - } - // finished feeding the new number // if Alpha: if entity.IsAlpha(b.hIerarchyNumber) { - if isActive(brIp.String()) { + if b.IsAssigned() { log.Println("Still ALPHA. This is ok.") } else { log.Println("I'm the new ALPHA! Get out my path, losers!") b.configureIpAndBridgeUp() - log.Println("Ip is active: ", isActive(brIp.String())) + log.Println("Ip is active: ", b.IsActive()) } + // here we manage the case when we are not alpha } else { log.Println("GULP! There is a bigger one, better descalating") - if isActive(brIp.String()) { + if b.IsAssigned() { + log.Println("Start removing the IP from the interface:") b.removeIPandBridgeInt() - + log.Println("Removed.Ip is reachable: ", b.IsActive()) + } else { + log.Println("Nothing to do, since IP is not assigned to the interface: ", b.ExistingInterface) + log.Println("Ip is reachable: ", b.IsActive()) } } @@ -130,22 +143,40 @@ func (b *AbstractBridge) HierarchyReLocator(entity AbstractMulticast) { func (b *AbstractBridge) WaitAndClean(entity AbstractMulticast) { - log.Println("Inizializing HAManager") + defer func() { + if r := recover(); r != nil { + fmt.Println("An error happened in , but Zoreide recovered. ") + fmt.Println("Error was: ", r) + } + }() - brIp, _, err := net.ParseCIDR(b.BridgeIpCIDR) - if err != nil { - log.Println(err.Error()) - } + log.Println("Inizializing HA-Manager") for { pollTime := len(entity.HierarchyArray) + 1 time.Sleep(time.Duration(pollTime) * time.Second) - // svuotare l'array e rifare le elezioni - if !isActive(string(brIp.String())) { + // Evitiamo di avere l'IP senza essere alpha + if b.IsAssigned() { + if entity.IsAlpha(b.hIerarchyNumber) { + log.Println("We are alpha and IP is assigned. All ok") + } else { + log.Println("Cannot have IP assigned without being alpha") + log.Println("Inconsistent situation: start removing the IP from the interface:") + b.removeIPandBridgeInt() + log.Println("Removed.Ip is still reachable: ", b.IsActive()) + } + } + + if b.IsActive() { + log.Println("Ip reachable, cluster OK") + continue + } else { + log.Println("Situation non consistent, restart from green field.") entity.HierarchyArray = entity.HierarchyArray[:0] entity.HierarchyArray = slices.Clip(entity.HierarchyArray) entity.AddUniqueAndSort(b.hIerarchyNumber) + log.Println("Restarted from green field: ", entity.HierarchyArray) } }