Skip to content

Commit

Permalink
Bug 1388493 - Better handling of metrics errors
Browse files Browse the repository at this point in the history
If the first request for metrics fails, show an empty state error message.
Otherwise show an alert if more than one consecutive request fails. The
alert has a retry link.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1388493
  • Loading branch information
spadgett committed Nov 28, 2016
1 parent 75f0b8d commit c074df5
Show file tree
Hide file tree
Showing 7 changed files with 256 additions and 123 deletions.
71 changes: 57 additions & 14 deletions app/scripts/directives/deploymentMetrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ angular.module('openshiftConsole')
// in case pods is empty.
containers: '=',
// Optional: set to 'compact' to show smaller charts (for the overview)
profile: '@'
profile: '@',
alerts: '=?'
},
templateUrl: function(elem, attrs) {
if (attrs.profile === 'compact') {
Expand Down Expand Up @@ -93,6 +94,9 @@ angular.module('openshiftConsole')
scope.loaded = false;
scope.noData = true;

// Track the number of consecutive failures.
var failureCount = 0;

// Get the URL to show in error messages.
MetricsService.getMetricsURL().then(function(url) {
scope.metricsURL = url;
Expand Down Expand Up @@ -311,7 +315,8 @@ angular.module('openshiftConsole')
return;
}

scope.loaded = true;
// Reset the number of failures on a successful request.
failureCount = 0;

// Show an average instead of a multiline chart when there are many pods.
scope.showAverage = _.size(scope.pods) > 5 || compact;
Expand Down Expand Up @@ -392,6 +397,50 @@ angular.module('openshiftConsole')
return config;
}

// If the first request for metrics fails, show an empty state error message.
// Otherwise show an alert if more than one consecutive request fails.
function metricsFailed(response) {
if (destroyed) {
return;
}

failureCount++;
if (scope.noData) {
// Show an empty state message if the first request for data fails.
scope.metricsError = {
status: _.get(response, 'status', 0),
details: _.get(response, 'data.errorMsg') ||
_.get(response, 'statusText') ||
"Status code " + _.get(response, 'status', 0)
};
return;
}

// If this is the first failure and a previous request succeeded, wait and try again.
if (failureCount < 2) {
return;
}

// Show an alert if we've failed more than once.
// Use scope.$id in the alert ID so that it is unique on pages that
// use the directive multiple times like monitoring.
var alertID = 'metrics-failed-' + scope.uniqueID;
scope.alerts[alertID] = {
type: 'error',
message: 'An error occurred updating metrics.',
links: [{
href: '',
label: 'Retry',
onClick: function() {
delete scope.alerts[alertID];
// Reset failure count to 1 to trigger a retry.
failureCount = 1;
update();
}
}]
};
}

// Make sure there are no errors or missing data before updating.
function canUpdate() {
var noPods = _.isEmpty(scope.pods);
Expand All @@ -400,7 +449,7 @@ angular.module('openshiftConsole')
scope.loaded = true;
return false;
}
return !scope.metricsError;
return !scope.metricsError && failureCount < 2;
}

function updateData(metricType, podName, podData) {
Expand All @@ -419,23 +468,17 @@ angular.module('openshiftConsole')
_.set(data, [metricType, podName], updated);
}

function handleError(response) {
scope.loaded = true;
scope.metricsError = {
status: _.get(response, 'status', 0),
details: _.get(response, 'data.errorMsg') ||
_.get(response, 'statusText') ||
"Status code " + _.get(response, 'status', 0)
};
}

function update() {
if (paused || !canUpdate()) {
return;
}
lastUpdated = Date.now();
var config = getConfig();
MetricsService.getPodMetrics(config).then(processData, handleError);
MetricsService.getPodMetrics(config).then(processData, metricsFailed).finally(function() {
// Even on errors mark metrics as loaded to replace the
// "Loading..." message with "No metrics to display."
scope.loaded = true;
});
}

// Updates immediately and then on options changes.
Expand Down
117 changes: 80 additions & 37 deletions app/scripts/directives/podMetrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ angular.module('openshiftConsole')
pod: '=',
sparklineWidth: '=?',
sparklineHeight: '=?',
includedMetrics: '=?' // defaults to ["cpu", "memory", "network"]
includedMetrics: '=?', // defaults to ["cpu", "memory", "network"]
alerts: '=?'
},
templateUrl: 'views/directives/pod-metrics.html',
link: function(scope) {
Expand All @@ -27,6 +28,7 @@ angular.module('openshiftConsole')
var getCPULimit = $parse('resources.limits.cpu');

var updateInterval = 60 * 1000; // 60 seconds

// Number of data points to display on the chart.
var numDataPoints = 30;

Expand Down Expand Up @@ -382,9 +384,61 @@ angular.module('openshiftConsole')
return null;
}

// Track the number of consecutive failures.
var failureCount = 0;

function metricsSucceeded() {
// Reset the number of failures on a successful request.
failureCount = 0;
}

// If the first request for metrics fails, show an empty state error message.
// Otherwise show an alert if more than one consecutive request fails.
function metricsFailed(response) {
if (destroyed) {
return;
}

failureCount++;
if (scope.noData) {
// Show an empty state message if the first request for data fails.
scope.metricsError = {
status: _.get(response, 'status', 0),
details: _.get(response, 'data.errorMsg') ||
_.get(response, 'statusText') ||
"Status code " + _.get(response, 'status', 0)
};
return;
}

// If this is the first failure and a previous request succeeded, wait and try again.
if (failureCount < 2) {
return;
}

// Show an alert if we've failed more than once.
// Use scope.$id in the alert ID so that it is unique on pages that
// use the directive multiple times like monitoring.
var alertID = 'metrics-failed-' + scope.uniqueID;
scope.alerts[alertID] = {
type: 'error',
message: 'An error occurred updating metrics for pod ' + _.get(scope, 'pod.metadata.name', '<unknown>') + '.',
links: [{
href: '',
label: 'Retry',
onClick: function() {
delete scope.alerts[alertID];
// Reset failure count to 1 to trigger a retry.
failureCount = 1;
update();
}
}]
};
}

// Make sure there are no errors or missing data before updating.
function canUpdate() {
if (scope.metricsError) {
if (scope.metricsError || failureCount > 1) {
return false;
}

Expand Down Expand Up @@ -419,8 +473,9 @@ angular.module('openshiftConsole')
// time. This prevents an issue where the donut chart shows 0 for
// current usage if the client clock is ahead of the server clock.
var start = getStartTime();
var allPromises = [];
angular.forEach(scope.metrics, function(metric) {
var promises = [];
var datasetPromises = [];

// On metrics that require more than one set of data (e.g. network
// incoming and outgoing traffic) we perform one request for each,
Expand All @@ -434,52 +489,40 @@ angular.module('openshiftConsole')
if (!config) {
return;
}
promises.push(MetricsService.get(config));
var promise = MetricsService.get(config);
datasetPromises.push(promise);
});

allPromises = allPromises.concat(datasetPromises);

// Collect all promises from every metric requested into one, so we
// have all data the chart wants at the time of the chart creation
// (or timeout updates, etc).
$q.all(promises).then(
// success
function(responses) {
if (destroyed) {
return;
}

angular.forEach(responses, function(response) {
if (!response) {
return;
}
$q.all(datasetPromises).then(function(responses) {
if (destroyed) {
return;
}

var dataset = _.find(metric.datasets, {
id: response.metricID
});
updateData(dataset, response);
});
updateChart(metric);
},
// failure
function(responses) {
if (destroyed) {
angular.forEach(responses, function(response) {
if (!response) {
return;
}

angular.forEach(responses, function(response) {
scope.metricsError = {
status: _.get(response, 'status', 0),
details: _.get(response, 'data.errorMsg') ||
_.get(response, 'statusText') ||
"Status code " + _.get(response, 'status', 0)
};
var dataset = _.find(metric.datasets, {
id: response.metricID
});
}
).finally(function() {
// Even on errors mark metrics as loaded to replace the
// "Loading..." message with "No metrics to display."
scope.loaded = true;
updateData(dataset, response);
});
updateChart(metric);
});
});

// Handle failures when any request fails.
$q.all(allPromises).then(metricsSucceeded, metricsFailed).finally(function() {
// Even on errors mark metrics as loaded to replace the
// "Loading..." message with "No metrics to display."
scope.loaded = true;
});
}

// Updates immediately and then on options changes.
Expand Down
3 changes: 2 additions & 1 deletion app/views/browse/pod.html
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
we don't update in the background. -->
<pod-metrics
ng-if="selectedTab.metrics"
pod="pod">
pod="pod"
alerts="alerts">
</pod-metrics>
</uib-tab>

Expand Down
3 changes: 2 additions & 1 deletion app/views/browse/replica-set.html
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
we don't update in the background. -->
<deployment-metrics
ng-if="selectedTab.metrics && podsForDeployment"
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers">
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers"
alerts="alerts">
</deployment-metrics>
</uib-tab>
<uib-tab ng-if="deploymentConfigName && logOptions.version && ('deploymentconfigs/log' | canI : 'get')" active="selectedTab.logs">
Expand Down
10 changes: 6 additions & 4 deletions app/views/monitoring.html
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ <h2>Deployments</h2>
<div class="mar-top-lg" ng-if="metricsAvailable">
<deployment-metrics
pods="podsByOwnerUID[replicationController.metadata.uid]"
containers="replicationController.spec.template.spec.containers">
containers="replicationController.spec.template.spec.containers"
alerts="alerts">
</deployment-metrics>
</div>
</div>
Expand Down Expand Up @@ -257,8 +258,9 @@ <h2>Deployments</h2>
<div class="mar-top-lg" ng-if="metricsAvailable">
<deployment-metrics
pods="podsByOwnerUID[replicaSet.metadata.uid]"
containers="replicaSet.spec.template.spec.containers">
</deployment-metrics>
containers="replicaSet.spec.template.spec.containers"
alerts="alerts">
</deployment-metrics>>
</div>
</div>
</div>
Expand Down Expand Up @@ -351,7 +353,7 @@ <h2>Pods</h2>
</log-viewer>
<!-- Until the metrics directive pulls the metrics from the time range of the life of the pod, hide the metrics for old stuff -->
<div class="mar-top-lg" ng-if="metricsAvailable">
<pod-metrics pod="pod"></pod-metrics>
<pod-metrics pod="pod" alerts="alerts"></pod-metrics>
</div>
</div>
</div>
Expand Down
Loading

0 comments on commit c074df5

Please sign in to comment.