Skip to content

Commit

Permalink
nit
Browse files Browse the repository at this point in the history
Signed-off-by: MohammedAbdi <[email protected]>
  • Loading branch information
mamy-CS committed Feb 20, 2025
1 parent 9f3e9eb commit a47940c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 62 deletions.
4 changes: 0 additions & 4 deletions internal/controller/instaslice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,6 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
remainingTime := 30*time.Second - elapsed
return ctrl.Result{RequeueAfter: remainingTime}, nil
}
// update DeployedPodTotal Metrics by setting value to 0 as pod allocation is deleted
if err = r.UpdateDeployedPodTotalMetrics(string(allocation.Nodename), allocation.GPUUUID, allocRequest.PodRef.Namespace, allocRequest.PodRef.Name, allocRequest.Profile, 0); err != nil {
log.Error(err, "Failed to update deployed pod metrics", "nodeName", allocation.Nodename)
}
}
}
}
Expand Down
31 changes: 6 additions & 25 deletions internal/controller/instaslice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,6 @@ var _ = Describe("Metrics Incrementation", func() {
instaslice *inferencev1alpha1.Instaslice
pod *v1.Pod
podUUID string
req ctrl.Request
)

BeforeEach(func() {
Expand All @@ -1108,7 +1107,7 @@ var _ = Describe("Metrics Incrementation", func() {
Config: config,
}

podUUID = "test-pod-uuid"
podUUID = "test-pod-uuid-2"

pod = &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1154,13 +1153,6 @@ var _ = Describe("Metrics Incrementation", func() {
}

Expect(fakeClient.Create(ctx, instaslice)).To(Succeed())

req = ctrl.Request{
NamespacedName: types.NamespacedName{
Name: "test-pod",
Namespace: InstaSliceOperatorNamespace,
},
}
})

// Test for updateMetricsAllSlotsFree
Expand All @@ -1170,19 +1162,6 @@ var _ = Describe("Metrics Incrementation", func() {
Expect(len(instaslice.Spec.PodAllocationRequests)).To(Equal(0))
})

// Test for IncrementTotalProcessedGpuSliceMetrics with ObservedGeneration
It("should correctly increment metrics based on ObservedGeneration", func() {
instaslice.Status.ObservedGeneration = 0
Expect(instaslice.Generation).To(Equal(int64(1)))

err := r.IncrementTotalProcessedGpuSliceMetrics("node-1", "gpu-1", 4)
Expect(err).ToNot(HaveOccurred())

// Simulate successful metrics processing
instaslice.Status.ObservedGeneration = instaslice.Generation
Expect(instaslice.Status.ObservedGeneration).To(Equal(instaslice.Generation))
})

// Test to prevent double metric incrementation
It("should not increment metrics more than once for the same generation", func() {
instaslice.Status.ObservedGeneration = instaslice.Generation // Metrics already processed
Expand Down Expand Up @@ -1226,9 +1205,11 @@ var _ = Describe("Metrics Incrementation", func() {
Expect(instaslice.Status.IsMetricProcessed).To(BeTrue()) // Ensure metrics are marked as processed

// Check cleanup of incompatible profiles
r.UpdateCompatibleProfilesMetrics(*instaslice, "node-1", map[string]int32{"gpu-1": 0})
Expect(instasliceMetrics.compatibleProfiles.WithLabelValues("1g.5gb", "node-1", "0")).NotTo(BeNil())
Expect(instasliceMetrics.compatibleProfiles.WithLabelValues("2g.10gb", "node-1", "0")).NotTo(BeNil())
err := r.UpdateCompatibleProfilesMetrics(*instaslice, "node-1", map[string]int32{"gpu-1": 0})
Expect(err).ToNot(HaveOccurred()) // Ensure the function runs without errors

Expect(instasliceMetrics.compatibleProfiles.WithLabelValues("1g.5gb", "node-1")).NotTo(BeNil())
Expect(instasliceMetrics.compatibleProfiles.WithLabelValues("2g.10gb", "node-1")).NotTo(BeNil())

// Simulate spec update
instaslice.Generation = 3
Expand Down
38 changes: 5 additions & 33 deletions internal/controller/prometheus_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ var (
// compatible profiles with remaining gpu slices
compatibleProfiles: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "instaslice_current_gpu_compatible_profiles",
Help: "Profiles compatible with remaining GPU slices.",
Help: "Profiles compatible with remaining GPU slices and their counts.",
},
[]string{"profile", "node", "count"}), // Labels: profile, node, count
[]string{"profile", "node"}), // Labels: profile, node
// total processed slices
processedSlices: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "instaslice_total_processed_gpu_slices",
Expand Down Expand Up @@ -135,7 +135,6 @@ func (r *InstasliceReconciler) UpdateCompatibleProfilesMetrics(instasliceObj inf
// "1g.10gb": 6,
// "1g.5gb+me": 7,
// }
recommendedProfileMap := GenerateProfileMapWithIndexes(instasliceObj)

// Maintain a map to track currently compatible profiles
currentProfiles := make(map[string]int32)
Expand Down Expand Up @@ -183,45 +182,18 @@ func (r *InstasliceReconciler) UpdateCompatibleProfilesMetrics(instasliceObj inf
}
}
}
// **Clear outdated Prometheus metrics before updating**
instasliceMetrics.compatibleProfiles.Reset()

// **Update metrics only once per profile**
for profileName, totalFit := range currentProfiles {
instasliceMetrics.compatibleProfiles.WithLabelValues(profileName, nodeName, fmt.Sprintf("%d", totalFit)).
Set(float64(recommendedProfileMap[profileName]))
instasliceMetrics.compatibleProfiles.WithLabelValues(profileName, nodeName).
Set(float64(totalFit))

ctrl.Log.Info("[UpdateCompatibleProfilesMetrics] Added compatible profile", "profile", profileName, "totalFit", totalFit)
}

// Clean up metrics for profiles that are no longer compatible
for profileName := range baseProfileSliceMap { // baseProfileSliceMap contains all possible profiles
if _, exists := currentProfiles[profileName]; !exists {
// Profile is no longer compatible; set its value to 0
instasliceMetrics.compatibleProfiles.WithLabelValues(profileName, nodeName, fmt.Sprintf("%d", 0)).Set(0)
ctrl.Log.Info("Removed incompatible profile", "profile", profileName, "nodeName", nodeName)
}
ctrl.Log.Info("[UpdateCompatibleProfilesMetrics] Added compatible profile", "profile", profileName, "count", totalFit)
}

return nil
}

// GenerateProfileMap extracts unique profiles and assigns incremental indexes for prometheus
func GenerateProfileMapWithIndexes(instaslice inferencev1alpha1.Instaslice) map[string]int {
profileMap := make(map[string]int)
index := 1

// Iterate through all profiles inside the Migplacement struct
for profile := range instaslice.Status.NodeResources.MigPlacement {
if _, exists := profileMap[profile]; !exists {
profileMap[profile] = index
index++
}
}

return profileMap
}

// generateProfileSliceMap generates the full map for both instaslice.redhat.com and nvidia.com
func generateProfileSliceMap() map[string]uint32 {
prefixes := []string{"instaslice.redhat.com/mig-", "nvidia.com/mig-"}
Expand Down

0 comments on commit a47940c

Please sign in to comment.