Skip to content

Commit

Permalink
Merge pull request #18654 from nak3/diagnostics
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue.

[Diagnostics] Fix AnalyzeLogs to provide more clear debug message

When we run `oc adm diagnostics AnalyzeLogs`, diagnostics command
often misses error messsages in journal logs. So, as an admin we are
trying to find out the reason, but debug message gives us unclear
message:

```
$ oc adm diagnostics AnalyzeLogs -l=0
   ...
debug: Stopped reading docker log: timestamp 1518835107917828 too old
   ...
```

So, this patch changes regarding AnayzeLogs as:

- Output field of struct of discovered systemd unit
- Produce correct error message if journal log has invalid timestamp
- Output why diagnostics stopped reading logs with readable timestamp.

Here is the message after applied this patch:

  ```
  debug: Stopped reading docker log: timestamp 2018-02-17 11:33:58 +0900 JST more than 1 hour ago
  ```
  • Loading branch information
openshift-merge-robot authored Feb 21, 2018
2 parents 2f9dbe3 + 8adfb26 commit 37a4ec3
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func resolveServerIP(serverUrl string, fn dnsResolver) ([]string, error) {

func searchNodesForIP(nodes []kapi.Node, ips []string) types.DiagnosticResult {
r := types.NewDiagnosticResult(MasterNodeName)
r.Debug("DClu3005", fmt.Sprintf("Seaching for a node with master IP: %s", ips))
r.Debug("DClu3005", fmt.Sprintf("Searching for a node with master IP: %s", ips))

// Loops = # of nodes * number of IPs per node (2 commonly) * # of IPs the
// server hostname resolves to. (should usually be 1)
Expand Down
17 changes: 7 additions & 10 deletions pkg/oc/admin/diagnostics/diagnostics/systemd/analyze_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,13 @@ func (d AnalyzeLogs) Check() types.DiagnosticResult {
if err := json.Unmarshal(bytes, &entry); err != nil {
r.Debug("DS0003", fmt.Sprintf("Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err)))
} else {
if lineCount > 500 && stampTooOld(entry.TimeStamp, timeLimit) {
r.Debug("DS0004", fmt.Sprintf("Stopped reading %s log: timestamp %s too old", unitName, entry.TimeStamp))
break // if we've analyzed at least 500 entries, stop when age limit reached (don't scan days of logs)
epochns, err := strconv.ParseInt(entry.TimeStamp, 10, 64)
if err == nil && time.Unix(epochns/1000000, 0).Before(timeLimit) && lineCount > 500 {
r.Debug("DS0005", fmt.Sprintf("Stopped reading %s log: timestamp %s more than 1 hour ago", unitName, time.Unix(epochns/1000000, 0)))
break
} else if err != nil {
r.Warn("DS0004", err, fmt.Sprintf("Find invalid timestamp %s in %s log", entry.TimeStamp, unitName))
continue
}
if unit.StartMatch.MatchString(entry.Message) {
break // saw log message for unit startup; don't analyze previous logs
Expand Down Expand Up @@ -142,10 +146,3 @@ func (d AnalyzeLogs) Check() types.DiagnosticResult {

return r
}

func stampTooOld(stamp string, timeLimit time.Time) bool {
if epochns, err := strconv.ParseInt(stamp, 10, 64); err == nil {
return time.Unix(epochns/1000000, 0).Before(timeLimit)
}
return true // something went wrong, stop looking...
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ func GetSystemdUnits(logger *log.Logger) map[string]types.SystemdUnit {
}
}

logger.Debug("DS1003", fmt.Sprintf("%v", systemdUnits))
return systemdUnits
}

Expand Down

0 comments on commit 37a4ec3

Please sign in to comment.