Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple auto-egress-IP fixes #18808

Merged
merged 5 commits into from
Mar 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 166 additions & 123 deletions pkg/network/node/egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,25 @@ import (
)

type nodeEgress struct {
nodeIP string

// requestedIPs are the EgressIPs listed on the node's HostSubnet
nodeIP string
requestedIPs sets.String
// assignedIPs are the IPs actually in use on the node
assignedIPs sets.String
}

type namespaceEgress struct {
vnid uint32

// requestedIP is the egress IP it wants (NetNamespace.EgressIPs[0])
vnid uint32
requestedIP string
// assignedIP is an egress IP actually in use on nodeIP
assignedIP string
nodeIP string
}

type egressIPInfo struct {
ip string

nodes []*nodeEgress
namespaces []*namespaceEgress

assignedNodeIP string
assignedIPTablesMark string
assignedVNID uint32
blockedVNIDs map[uint32]bool
}

type egressIPWatcher struct {
Expand All @@ -48,13 +51,9 @@ type egressIPWatcher struct {
networkInformers networkinformers.SharedInformerFactory
iptables *NodeIPTables

// from HostSubnets
nodesByNodeIP map[string]*nodeEgress
nodesByEgressIP map[string]*nodeEgress

// From NetNamespaces
namespacesByVNID map[uint32]*namespaceEgress
namespacesByEgressIP map[string]*namespaceEgress
nodesByNodeIP map[string]*nodeEgress
namespacesByVNID map[uint32]*namespaceEgress
egressIPs map[string]*egressIPInfo

localEgressLink netlink.Link
localEgressNet *net.IPNet
Expand All @@ -67,11 +66,9 @@ func newEgressIPWatcher(oc *ovsController, localIP string, masqueradeBit *int32)
oc: oc,
localIP: localIP,

nodesByNodeIP: make(map[string]*nodeEgress),
nodesByEgressIP: make(map[string]*nodeEgress),

namespacesByVNID: make(map[uint32]*namespaceEgress),
namespacesByEgressIP: make(map[string]*namespaceEgress),
nodesByNodeIP: make(map[string]*nodeEgress),
namespacesByVNID: make(map[uint32]*namespaceEgress),
egressIPs: make(map[string]*egressIPInfo),
}
if masqueradeBit != nil {
eip.masqueradeBit = 1 << uint32(*masqueradeBit)
Expand Down Expand Up @@ -106,6 +103,47 @@ func getMarkForVNID(vnid, masqueradeBit uint32) string {
return fmt.Sprintf("0x%08x", vnid)
}

func (eip *egressIPWatcher) ensureEgressIPInfo(egressIP string) *egressIPInfo {
eg := eip.egressIPs[egressIP]
if eg == nil {
eg = &egressIPInfo{ip: egressIP}
eip.egressIPs[egressIP] = eg
}
return eg
}

func (eg *egressIPInfo) addNode(node *nodeEgress) {
if len(eg.nodes) != 0 {
utilruntime.HandleError(fmt.Errorf("Multiple nodes claiming EgressIP %q (nodes %q, %q)", eg.ip, node.nodeIP, eg.nodes[0].nodeIP))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we clarify that only the first node that claimed the egress IP will have egress rules?
(syncEgressIPTablesState() checks eg.nodes[0].nodeIP)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually none of them will; syncEgressIPTablesState() only looks at eg.nodes[0] when len(eg.nodes)==1

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
eg.nodes = append(eg.nodes, node)
}

func (eg *egressIPInfo) deleteNode(node *nodeEgress) {
for i := range eg.nodes {
if eg.nodes[i] == node {
eg.nodes = append(eg.nodes[:i], eg.nodes[i+1:]...)
return
}
}
}

func (eg *egressIPInfo) addNamespace(ns *namespaceEgress) {
if len(eg.namespaces) != 0 {
utilruntime.HandleError(fmt.Errorf("Multiple namespaces claiming EgressIP %q (NetIDs %d, %d)", eg.ip, ns.vnid, eg.namespaces[0].vnid))
}
eg.namespaces = append(eg.namespaces, ns)
}

func (eg *egressIPInfo) deleteNamespace(ns *namespaceEgress) {
for i := range eg.namespaces {
if eg.namespaces[i] == ns {
eg.namespaces = append(eg.namespaces[:i], eg.namespaces[i+1:]...)
return
}
}
}

func (eip *egressIPWatcher) watchHostSubnets() {
funcs := common.InformerFuncs(&networkapi.HostSubnet{}, eip.handleAddOrUpdateHostSubnet, eip.handleDeleteHostSubnet)
eip.networkInformers.Network().InternalVersion().HostSubnets().Informer().AddEventHandler(funcs)
Expand Down Expand Up @@ -137,7 +175,6 @@ func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []stri
node = &nodeEgress{
nodeIP: nodeIP,
requestedIPs: sets.NewString(),
assignedIPs: sets.NewString(),
}
eip.nodesByNodeIP[nodeIP] = node
} else if len(nodeEgressIPs) == 0 {
Expand All @@ -148,89 +185,19 @@ func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []stri

// Process new EgressIPs
for _, ip := range node.requestedIPs.Difference(oldRequestedIPs).UnsortedList() {
if oldNode := eip.nodesByEgressIP[ip]; oldNode != nil {
utilruntime.HandleError(fmt.Errorf("Multiple nodes claiming EgressIP %q (nodes %q, %q)", ip, node.nodeIP, oldNode.nodeIP))
continue
}

eip.nodesByEgressIP[ip] = node
eip.maybeAddEgressIP(ip)
eg := eip.ensureEgressIPInfo(ip)
eg.addNode(node)
eip.syncEgressIP(eg)
}

// Process removed EgressIPs
for _, ip := range oldRequestedIPs.Difference(node.requestedIPs).UnsortedList() {
if oldNode := eip.nodesByEgressIP[ip]; oldNode != node {
// User removed a duplicate EgressIP
eg := eip.egressIPs[ip]
if eg == nil {
continue
}

eip.deleteEgressIP(ip)
delete(eip.nodesByEgressIP, ip)
}
}

func (eip *egressIPWatcher) maybeAddEgressIP(egressIP string) {
node := eip.nodesByEgressIP[egressIP]
ns := eip.namespacesByEgressIP[egressIP]
if ns == nil {
return
}

mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
nodeIP := ""

if node != nil && !node.assignedIPs.Has(egressIP) {
node.assignedIPs.Insert(egressIP)
nodeIP = node.nodeIP
if node.nodeIP == eip.localIP {
if err := eip.assignEgressIP(egressIP, mark); err != nil {
utilruntime.HandleError(fmt.Errorf("Error assigning Egress IP %q: %v", egressIP, err))
nodeIP = ""
}
}
}

if ns.assignedIP != egressIP || ns.nodeIP != nodeIP {
ns.assignedIP = egressIP
ns.nodeIP = nodeIP

err := eip.oc.UpdateNamespaceEgressRules(ns.vnid, ns.nodeIP, mark)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
}
}
}

func (eip *egressIPWatcher) deleteEgressIP(egressIP string) {
node := eip.nodesByEgressIP[egressIP]
ns := eip.namespacesByEgressIP[egressIP]
if node == nil || ns == nil {
return
}

mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
if node.nodeIP == eip.localIP {
if err := eip.releaseEgressIP(egressIP, mark); err != nil {
utilruntime.HandleError(fmt.Errorf("Error releasing Egress IP %q: %v", egressIP, err))
}
node.assignedIPs.Delete(egressIP)
}

if ns.assignedIP == egressIP {
ns.assignedIP = ""
ns.nodeIP = ""
}

var err error
if ns.requestedIP == "" {
// Namespace no longer wants EgressIP
err = eip.oc.UpdateNamespaceEgressRules(ns.vnid, "", "")
} else {
// Namespace still wants EgressIP but no node provides it
err = eip.oc.UpdateNamespaceEgressRules(ns.vnid, "", mark)
}
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
eg.deleteNode(node)
eip.syncEgressIP(eg)
}
}

Expand Down Expand Up @@ -266,44 +233,120 @@ func (eip *egressIPWatcher) updateNamespaceEgress(vnid uint32, egressIP string)

ns := eip.namespacesByVNID[vnid]
Copy link

@pravisankar pravisankar Mar 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to consider join and isolate project operations otherwise there will be some problems.
Example:

  1. ns1 with vnid v1 requested egrees ip e1
  2. ns2 with vnid v2 requested egress ip e2
  3. ns2 joined with ns1, now ns2 has vnid v1
  4. eip.namespacesByVNID[v1] will now have requestedIP=e2
  5. ns2 got isolated

Now, v1 has only one namespace ns1 but the requested ip will be set as ep2 instead of ep1 which is incorrect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filed #18852

if ns == nil {
if egressIP == "" {
return
}
ns = &namespaceEgress{vnid: vnid}
eip.namespacesByVNID[vnid] = ns
} else if egressIP == "" {
delete(eip.namespacesByVNID, vnid)
}

if ns.requestedIP == egressIP {
return
}
if oldNS := eip.namespacesByEgressIP[egressIP]; oldNS != nil {
utilruntime.HandleError(fmt.Errorf("Multiple NetNamespaces claiming EgressIP %q (NetIDs %d, %d)", egressIP, ns.vnid, oldNS.vnid))
return
}

if ns.assignedIP != "" {
eip.deleteEgressIP(egressIP)
delete(eip.namespacesByEgressIP, egressIP)
ns.assignedIP = ""
ns.nodeIP = ""
if ns.requestedIP != "" {
eg := eip.egressIPs[ns.requestedIP]
if eg != nil {
eg.deleteNamespace(ns)
eip.syncEgressIP(eg)
}
}

ns.requestedIP = egressIP
eip.namespacesByEgressIP[egressIP] = ns
eip.maybeAddEgressIP(egressIP)
if egressIP == "" {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is to handle delete case where ns is no longer referenced in eip.namespacesByVNID?
To be clear, may be we could assign ns.requestedIP = egressip after this check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, you're right, the ns.requestedIP assignment is irrelevant (but harmless) in this case

return
}

eg := eip.ensureEgressIPInfo(egressIP)
eg.addNamespace(ns)
eip.syncEgressIP(eg)
}

func (eip *egressIPWatcher) deleteNamespaceEgress(vnid uint32) {
eip.Lock()
defer eip.Unlock()
eip.updateNamespaceEgress(vnid, "")
}

ns := eip.namespacesByVNID[vnid]
if ns == nil {
return
func (eip *egressIPWatcher) syncEgressIP(eg *egressIPInfo) {
assignedNodeIPChanged := eip.syncEgressIPTablesState(eg)
eip.syncEgressOVSState(eg, assignedNodeIPChanged)
}

func (eip *egressIPWatcher) syncEgressIPTablesState(eg *egressIPInfo) bool {
// The egressIPInfo should have an assigned node IP if and only if the
// egress IP is active (ie, it is assigned to exactly 1 node and exactly
// 1 namespace).
egressIPActive := (len(eg.nodes) == 1 && len(eg.namespaces) == 1)
assignedNodeIPChanged := false
if egressIPActive && eg.assignedNodeIP != eg.nodes[0].nodeIP {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There could be very rare corner case when F5 is involved.
Scenario:

  1. F5 node n1(no real backing node only hostSubnet object) requested egress ip e1
  2. node n2 requested same egress ip e1
  3. eg.nodes == [n1, n2], n1 claims the egress ip e1
  4. SDN controller calls delete + create on F5 node n1 (removes AssignHostSubnetAnnoation and assigns vnid)
  5. eg.nodes == [n2, n1] now n2 claims the egress ip e1 (side effect of how we handle F5 case)

This is theoretical case but may never happen in practice. I don't think we need to fix this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as above, there's no actual corner case here because egressIPActive would be false at both step 3 and step 5

eg.assignedNodeIP = eg.nodes[0].nodeIP
eg.assignedIPTablesMark = getMarkForVNID(eg.namespaces[0].vnid, eip.masqueradeBit)
assignedNodeIPChanged = true
if eg.assignedNodeIP == eip.localIP {
if err := eip.assignEgressIP(eg.ip, eg.assignedIPTablesMark); err != nil {
utilruntime.HandleError(fmt.Errorf("Error assigning Egress IP %q: %v", eg.ip, err))
eg.assignedNodeIP = ""
}
}
} else if !egressIPActive && eg.assignedNodeIP != "" {
if eg.assignedNodeIP == eip.localIP {
if err := eip.releaseEgressIP(eg.ip, eg.assignedIPTablesMark); err != nil {
utilruntime.HandleError(fmt.Errorf("Error releasing Egress IP %q: %v", eg.ip, err))
}
}
eg.assignedNodeIP = ""
eg.assignedIPTablesMark = ""
assignedNodeIPChanged = true
}
return assignedNodeIPChanged
}

func (eip *egressIPWatcher) syncEgressOVSState(eg *egressIPInfo, assignedNodeIPChanged bool) {
var blockedVNIDs map[uint32]bool

// If multiple namespaces are assigned to the same EgressIP, we need to block
// outgoing traffic from all of them.
if len(eg.namespaces) > 1 {
eg.assignedVNID = 0
blockedVNIDs = make(map[uint32]bool)
for _, ns := range eg.namespaces {
blockedVNIDs[ns.vnid] = true
if !eg.blockedVNIDs[ns.vnid] {
err := eip.oc.SetNamespaceEgressDropped(ns.vnid)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
}
}
}
}

// If we have, or had, a single egress namespace, then update the OVS flows if
// something has changed
var err error
if len(eg.namespaces) == 1 && (eg.assignedVNID != eg.namespaces[0].vnid || assignedNodeIPChanged) {
eg.assignedVNID = eg.namespaces[0].vnid
delete(eg.blockedVNIDs, eg.assignedVNID)
err = eip.oc.SetNamespaceEgressViaEgressIP(eg.assignedVNID, eg.assignedNodeIP, getMarkForVNID(eg.assignedVNID, eip.masqueradeBit))
} else if len(eg.namespaces) == 0 && eg.assignedVNID != 0 {
err = eip.oc.SetNamespaceEgressNormal(eg.assignedVNID)
eg.assignedVNID = 0
}
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
}

if ns.assignedIP != "" {
ns.requestedIP = ""
egressIP := ns.assignedIP
eip.deleteEgressIP(egressIP)
delete(eip.namespacesByEgressIP, egressIP)
// If we previously had blocked VNIDs, we need to unblock any that have been removed
// from the duplicates list
for vnid := range eg.blockedVNIDs {
if !blockedVNIDs[vnid] {
err := eip.oc.SetNamespaceEgressNormal(vnid)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
}
}
}
delete(eip.namespacesByVNID, vnid)
eg.blockedVNIDs = blockedVNIDs
}

func (eip *egressIPWatcher) assignEgressIP(egressIP, mark string) error {
Expand Down
Loading