Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add instaslice custom metrics #353

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/v1alpha1/instaslice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ type InstasliceStatus struct {
// nodeResources specifies the discovered resources of the node
// +optional
NodeResources DiscoveredNodeResources `json:"nodeResources"`
// ObservedGeneration tracks the latest generation of the resource that has been observed and acted upon by the controller
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

//+kubebuilder:object:root=true
Expand Down
19 changes: 19 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,31 @@ func init() {
//+kubebuilder:scaffold:scheme
}

func getEnv(key, defval string) string {
if value, ok := os.LookupEnv(key); ok {
return value
}
return defval
}

func main() {
// Log info before initializing metrics exporter
ctrl.Log.Info("[SetupWithManager] Initializing Metrics Exporter.")
controller.RegisterMetrics()
// Log info after the metrics exporter is initialized
ctrl.Log.Info("[SetupWithManager] Metrics Exporter Initialized.")
var instaslicePrometheusMetricsUrl string = "http://0.0.0.0:8443"

// NOTE: these can be set as env or flag, flag takes precedence over env
instaslicePrometheusMetricsUrlEnv := getEnv("INSTASLICE-PROMETHEUS-METRICS-URL", instaslicePrometheusMetricsUrl)

var metricsAddr string
var enableLeaderElection bool
var probeAddr string
var secureMetrics bool
var enableHTTP2 bool
flag.StringVar(&instaslicePrometheusMetricsUrl, "instaslice-prometheus-metrics-url", instaslicePrometheusMetricsUrlEnv,
"The URL for the Prometheus metrics where Instaslice exposes metrics")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand Down
5 changes: 5 additions & 0 deletions config/crd/bases/inference.redhat.com_instaslices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,11 @@ spec:
- nodeGpus
- nodeResources
type: object
observedGeneration:
description: ObservedGeneration tracks the latest generation of the
resource that has been observed and acted upon by the controller
format: int64
type: integer
podAllocationResults:
additionalProperties:
properties:
Expand Down
3 changes: 3 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ spec:
capabilities:
drop:
- "ALL"
runAsUser: 1000
livenessProbe:
httpGet:
path: /healthz
Expand All @@ -104,5 +105,7 @@ spec:
value: <IMG_DMST>
- name: EMULATOR_MODE
value: "false"
- name: INSTASLICE-PROMETHEUS-METRICS-URL
value: "http://0.0.0.0:8443"
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
15 changes: 15 additions & 0 deletions deploy/instaslice-metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: instaslice-metrics
namespace: instaslice-system
labels:
control-plane: controller-manager
spec:
ports:
- name: metrics
port: 8443
protocol: TCP
targetPort: 8443
selector:
control-plane: controller-manager # Use the correct label here
23 changes: 23 additions & 0 deletions deploy/instaslice-servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: instaslice-monitor
namespace: instaslice-monitoring
labels:
release: prometheus # Label to match Prometheus serviceMonitorSelector
spec:
selector:
matchLabels:
control-plane: controller-manager # Match labels of the Service exposing metrics
namespaceSelector:
matchNames:
- instaslice-system # Namespace where the Service resides
endpoints:
- port: metrics # Port name exposed in the Service for kube-rbac-proxy
interval: 15s
path: /metrics
scheme: https
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token # Prometheus authentication
honorLabels: true
tlsConfig:
insecureSkipVerify: true # Set to false if using a valid CA
16 changes: 16 additions & 0 deletions deploy/prometheus-role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-metrics-reader
namespace: instaslice-system
rules:
- apiGroups: [""]
resources: ["services", "endpoints", "pods"]
verbs: ["get", "list", "watch"]
- apiGroups: ["metrics.k8s.io"]
resources: ["pods", "nodes"]
verbs: ["get", "list"]
- apiGroups: ["authorization.k8s.io"]
resources: ["subjectaccessreviews"]
verbs: ["create"]

13 changes: 13 additions & 0 deletions deploy/prometheus-rolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-metrics-binding
namespace: instaslice-system
subjects:
- kind: ServiceAccount
name: prometheus-kube-prometheus-prometheus # Change this to your Prometheus ServiceAccount
namespace: instaslice-monitoring # Change to Prometheus namespace
roleRef:
kind: Role
name: prometheus-metrics-reader
apiGroup: rbac.authorization.k8s.io
5 changes: 5 additions & 0 deletions deploy/prometheus-serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: instaslice-monitoring # namespace where Prometheus is running
36 changes: 36 additions & 0 deletions deploy/prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
alertmanager:
enabled: false
kube-state-metrics:
enabled: false
prometheus-node-exporter:
enabled: false
prometheus-pushgateway:
enabled: false
server:
name: instaslice
service:
enabled: true
type: NodePort
servicePort: 9090
persistentVolume:
existingClaim: prometheus-instaslice
enabled: false
securityContext:
runAsUser:
runAsNonRoot:
runAsGroup:
fsGroup:
extraScrapeConfigs: |
- job_name: instaslice-metrics
honor_labels: true
metrics_path: /metrics
scheme: https
scrape_interval: 15s
static_configs:
- targets:
- instaslice-metrics.instaslice-system.svc.cluster.local:8443
tls_config:
insecure_skip_verify: true
serviceMonitorSelector:
matchLabels:
release: prometheus
13 changes: 10 additions & 3 deletions internal/controller/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,15 @@ const (
daemonSetImageName = "quay.io/amalvank/instaslicev2-daemonset:latest"
daemonSetName = "daemonset"
serviceAccountName = "instaslice-operator-controller-manager"
profile3g20gb = "3g.20gb"
profile1g10gb = "1g.10gb"

Requeue1sDelay = 1 * time.Second
Requeue2sDelay = 2 * time.Second
requeue10sDelay = 10 * time.Second
Requeue1sDelay = 1 * time.Second
Requeue2sDelay = 2 * time.Second
requeue10sDelay = 10 * time.Second
maxSlices7g40gb = 7
EndPosSlices3g20gb = 3
EndPosSlices1g10gb = 1
EndStartPos3g20gb = 4
EndStartPos1g10gb = 6
)
85 changes: 71 additions & 14 deletions internal/controller/instaslice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}
if allocation.AllocationStatus.AllocationStatusDaemonset == inferencev1alpha1.AllocationStatusDeleted {
err := r.removeInstasliceAllocation(ctx, instaslice.Name, &allocation)
err := r.removeInstasliceAllocation(ctx, &allocation, instaslice, uuid)
if err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -244,7 +244,7 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}

if allocation.AllocationStatus.AllocationStatusDaemonset == inferencev1alpha1.AllocationStatusDeleted {
err := r.removeInstasliceAllocation(ctx, instaslice.Name, &allocation)
err := r.removeInstasliceAllocation(ctx, &allocation, instaslice, uuid)
if err != nil {
return ctrl.Result{}, err
}
Expand All @@ -264,6 +264,8 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
log.Info("finalizer deleted for succeeded ", "pod", pod.Name)
}
// If no allocations exist, update metrics with all slots free
r.updateMetricsAllSlotsFree(ctx, instasliceList)
return ctrl.Result{}, nil
}

Expand All @@ -283,7 +285,7 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}
if podUuid == pod.UID && allocation.AllocationStatus.AllocationStatusDaemonset == inferencev1alpha1.AllocationStatusDeleted {
err := r.removeInstasliceAllocation(ctx, instaslice.Name, &allocation)
err := r.removeInstasliceAllocation(ctx, &allocation, instaslice, podUuid)
if err != nil {
return ctrl.Result{}, err
}
Expand All @@ -308,8 +310,8 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
for _, instaslice := range instasliceList.Items {
for podUuid, allocation := range instaslice.Status.PodAllocationResults {
if podUuid == pod.UID {
allocRequest := instaslice.Spec.PodAllocationRequests[podUuid]
if allocation.AllocationStatus.AllocationStatusDaemonset == inferencev1alpha1.AllocationStatusDeleted {
allocRequest := instaslice.Spec.PodAllocationRequests[podUuid]
err := utils.UpdateOrDeleteInstasliceAllocations(ctx, r.Client, instaslice.Name, &allocation, &allocRequest)
if err != nil {
return ctrl.Result{}, err
Expand Down Expand Up @@ -397,22 +399,48 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// Sort by Name in ascending order
return instasliceList.Items[i].Name < instasliceList.Items[j].Name
})

var successfulAllocRequest *inferencev1alpha1.AllocationRequest
var successfulAllocResult *inferencev1alpha1.AllocationResult
var instasliceListItemSuccess inferencev1alpha1.Instaslice
for _, instaslice := range instasliceList.Items {
// find the GPU on the node and the GPU index where the slice can be created
allocRequest, allocResult, err := r.findNodeAndDeviceForASlice(ctx, &instaslice, profileName, policy, pod)
if err != nil {
continue
}
podHasNodeAllocation = true
if podHasNodeAllocation {
err := utils.UpdateOrDeleteInstasliceAllocations(ctx, r.Client, instaslice.Name, allocResult, allocRequest)
if err != nil {
return ctrl.Result{Requeue: true}, nil
successfulAllocRequest = allocRequest
successfulAllocResult = allocResult
instasliceListItemSuccess = instaslice
// Break immediately after finding a suitable allocation
break

}
if podHasNodeAllocation {
err := utils.UpdateOrDeleteInstasliceAllocations(ctx, r.Client, instasliceListItemSuccess.Name, successfulAllocResult, successfulAllocRequest)
if err != nil {
return ctrl.Result{Requeue: true}, err
}
// allocation was successful
// Update total processed GPU slices metrics
// Check if metrics need processing based on ObservedGeneration
if instasliceListItemSuccess.Status.ObservedGeneration < instasliceListItemSuccess.Generation {
if err := r.IncrementTotalProcessedGpuSliceMetrics(string(successfulAllocResult.Nodename), successfulAllocResult.GPUUUID, successfulAllocResult.MigPlacement.Size, successfulAllocResult.MigPlacement.Start, successfulAllocRequest.Profile); err != nil {
log.Error(err, "Failed to update total processed GPU slices metric", "nodeName", successfulAllocResult.Nodename, "gpuID", successfulAllocResult.GPUUUID)
return ctrl.Result{Requeue: true}, err
}
// Mark as processed by updating ObservedGeneration
instasliceListItemSuccess.Status.ObservedGeneration = instasliceListItemSuccess.Generation
if err := r.Status().Update(ctx, &instasliceListItemSuccess); err != nil {
log.Error(err, "Failed to update Instaslice status after processing metrics", "allocation", successfulAllocRequest)
return ctrl.Result{Requeue: true}, err
}
// allocation was successful
return ctrl.Result{}, nil
}

return ctrl.Result{}, nil
}

}

// if the cluster does not have suitable node, requeue request
Expand All @@ -425,10 +453,24 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)

}

// updates UpdateGpuSliceMetrics and UpdateCompatibleProfilesMetrics
r.updateMetrics(ctx, instasliceList)

// Update current pending GPU slice requests metrics
pendingCount, err := r.getPendingGpuRequests(ctx, r.Client)
if err != nil {
log.Error(err, "Failed to count pending GPU slice requests")
return ctrl.Result{}, err
}
if err := r.UpdatePendingSliceRequests(pendingCount); err != nil {
log.Error(err, "Failed to update pending GPU slice requests metric")
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

// create the DaemonSet object
// createInstaSliceDaemonSet - create the DaemonSet object
func (r *InstasliceReconciler) createInstaSliceDaemonSet(namespace string) *appsv1.DaemonSet {
emulatorMode := r.Config.EmulatorModeEnable
instasliceDaemonsetImage := r.Config.DaemonsetImage
Expand Down Expand Up @@ -596,10 +638,12 @@ func (r *InstasliceReconciler) SetupWithManager(mgr ctrl.Manager) error {
return err
}

return ctrl.NewControllerManagedBy(mgr).
controllerManager := ctrl.NewControllerManagedBy(mgr).
For(&v1.Pod{}).Named("InstaSlice-controller").
Watches(&inferencev1alpha1.Instaslice{}, handler.EnqueueRequestsFromMapFunc(r.podMapFunc)).
Complete(r)

return controllerManager
}

func (r *InstasliceReconciler) unGatePod(podUpdate *v1.Pod) *v1.Pod {
Expand Down Expand Up @@ -678,15 +722,28 @@ func (l *RightToLeftPolicy) SetAllocationDetails(profileName string, newStart, s
return &inferencev1alpha1.AllocationRequest{}
}

func (r *InstasliceReconciler) removeInstasliceAllocation(ctx context.Context, instasliceName string, allocation *inferencev1alpha1.AllocationResult) error {
func (r *InstasliceReconciler) removeInstasliceAllocation(ctx context.Context, allocation *inferencev1alpha1.AllocationResult, instaslice inferencev1alpha1.Instaslice, podUid types.UID) error {
log := logr.FromContext(ctx)
if allocation.AllocationStatus.AllocationStatusDaemonset == inferencev1alpha1.AllocationStatusDeleted {
err := utils.UpdateOrDeleteInstasliceAllocations(ctx, r.Client, instasliceName, nil, nil)
err := utils.UpdateOrDeleteInstasliceAllocations(ctx, r.Client, instaslice.Name, nil, nil)
if err != nil {
return err
}
}
allocRequest, exists := instaslice.Spec.PodAllocationRequests[podUid]
// prevents using an empty allocRequest and logs an error when it's missing
if !exists {
log.Error(fmt.Errorf("podUid not found"), "Pod UID not found in Instaslice PodAllocationRequests", "podUid", podUid)
return fmt.Errorf("podUid %s not found in Instaslice PodAllocationRequests", podUid)
}

// update DeployedPodTotal Metrics by setting value to 0 as pod allocation is deleted
if err := r.UpdateDeployedPodTotalMetrics(string(allocation.Nodename), allocation.GPUUUID, allocRequest.PodRef.Namespace, allocRequest.PodRef.Name, allocRequest.Profile, 0); err != nil {
log.Error(err, "Failed to update deployed pod metrics", "nodeName", allocation.Nodename)
}
return nil
}

func (r *InstasliceReconciler) setInstasliceAllocationToDeleting(ctx context.Context, instasliceName string, allocResult *inferencev1alpha1.AllocationResult, allocRequest *inferencev1alpha1.AllocationRequest) (ctrl.Result, error) {
log := logr.FromContext(ctx)
allocResult.AllocationStatus.AllocationStatusController = inferencev1alpha1.AllocationStatusDeleting
Expand Down
Loading