Skip to content

Commit ca10943

Browse files
committed
fix: use a priority heap to choose most important events to display
e.g. if we are limited in vertical space, prefer to show pods still Pulling images over pods that have announced they are done and Pulled also this improves the display of duplicate lines, rather just updating the timestamp
1 parent 935a092 commit ca10943

File tree

7 files changed

+102
-57
lines changed

7 files changed

+102
-57
lines changed

package-lock.json

+16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/plugin-codeflare-dashboard/package.json

+2
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@
2323
"access": "public"
2424
},
2525
"devDependencies": {
26+
"@types/heap": "^0.2.31",
2627
"@types/split2": "^3.2.1",
2728
"react-devtools-core": "^4.27.4"
2829
},
2930
"dependencies": {
3031
"@logdna/tail-file": "^3.0.1",
3132
"chokidar": "^3.5.3",
33+
"heap": "^0.2.7",
3234
"ink": "^3.2.0",
3335
"madwizard": "^8.0.3",
3436
"pretty-ms": "^7.0.1",

plugins/plugin-codeflare-dashboard/src/components/Dashboard/index.tsx

+12-12
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ export default class Dashboard extends React.PureComponent<Props, State> {
8686
this.setState((curState) => ({
8787
firstUpdate: (curState && curState.firstUpdate) || Date.now(), // TODO pull from the events
8888
lastUpdate: Date.now(), // TODO pull from the events
89-
lines: !model.lines || model.lines.N === 0 ? curState?.lines : model.lines,
89+
lines: !model.lines || model.lines.length === 0 ? curState?.lines : model.lines,
9090
workers: !curState?.workers
9191
? [model.workers]
9292
: [...curState.workers.slice(0, gridIdx), model.workers, ...curState.workers.slice(gridIdx + 1)],
@@ -168,26 +168,26 @@ export default class Dashboard extends React.PureComponent<Props, State> {
168168
return prettyMillis(millis, { compact: true }) + " ago"
169169
}
170170

171-
private agos(timestamp: string) {
172-
return this.ago(Date.now() - new Date(timestamp).getTime())
171+
private agos(millis: number) {
172+
return this.ago(Date.now() - millis)
173173
}
174174

175175
private footer() {
176176
if (!this.lines) {
177177
return <React.Fragment />
178178
} else {
179-
const rows = []
180-
for (let idx = this.lines.idx, n = 0; n < this.lines.N; idx = (idx + 1) % this.lines.lines.length, n++) {
181-
const line = this.lines.lines[idx]
182-
.replace(/(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ)/, (_, timestamp) => this.agos(timestamp))
183-
.replace(/pod\/torchx-\S+ /, "") // worker name in torchx
184-
.replace(/pod\/ray-(head|worker)-\S+ /, "") // worker name in ray
185-
.replace(/\* /, "") // wildcard worker name (codeflare)
179+
const rows = this.lines.map(({ line, timestamp }) => {
180+
const txt = line.replace("{timestamp}", () => this.agos(timestamp))
181+
// .replace(/pod\/torchx-\S+ /, "") // worker name in torchx
182+
// .replace(/pod\/ray-(head|worker)-\S+ /, "") // worker name in ray
183+
// .replace(/\* /, "") // wildcard worker name (codeflare)
184+
// .replace(/\x1b\x5B\[2J/g, "")
185+
// TODO timestamp
186186

187187
// [2J is part of clear screen; we don't want those to flow through
188188
// eslint-disable-next-line no-control-regex
189-
rows.push(<Text key={line + "-" + n}>{line.replace(/\x1b\x5B\[2J/g, "")}</Text>)
190-
}
189+
return <Text key={txt}>{txt}</Text>
190+
})
191191
return (
192192
<Box marginTop={1} flexDirection="column">
193193
{rows}

plugins/plugin-codeflare-dashboard/src/components/Dashboard/types.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export type UpdatePayload = {
3939
workers: Worker[]
4040

4141
/** Lines of raw output to be displayed */
42-
lines?: { lines: string[]; idx: number; N: number }
42+
lines?: { line: string; timestamp: number }[]
4343
}
4444

4545
/** Callback from controller when it has updated data */

plugins/plugin-codeflare-dashboard/src/controller/dashboard/status/Live.ts

+51-38
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414
* limitations under the License.
1515
*/
1616

17+
import Heap from "heap"
1718
import stripAnsi from "strip-ansi"
1819
import type { TextProps } from "ink"
1920

2021
import type { Tail } from "../tailf.js"
2122
import type { OnData, Worker } from "../../../components/Dashboard/types.js"
2223

23-
import { WorkerState, stateFor } from "./states.js"
24+
import { WorkerState, rankFor, stateFor } from "./states.js"
25+
26+
type Line = { line: string; stateRank: number; timestamp: number }
2427

2528
/**
2629
* Maintain a model of live data from a given set of file streams
@@ -32,16 +35,21 @@ export default class Live {
3235
private readonly workers: Record<string, Worker> = {}
3336

3437
/** Number of lines of output to retain. TODO this depends on height of terminal? */
35-
private static readonly nLines = 18
36-
37-
/** Model of the last `Live.nLines` lines of output */
38-
private readonly lines: string[] = Array(Live.nLines).fill("")
38+
private static readonly MAX_HEAP = 1000
3939

40-
/** Current insertion index into `this.lines` */
41-
private linesInsertionIdx = 0
40+
/** Model of the lines of output */
41+
private readonly lines = new Heap<Line>((a, b) => {
42+
if (a.line === b.line) {
43+
return 0
44+
}
4245

43-
/** Current number of valid lines in `this.lines` */
44-
private linesCount = 0
46+
const stateDiff = a.stateRank - b.stateRank
47+
if (stateDiff !== 0) {
48+
return stateDiff
49+
} else {
50+
return a.timestamp - b.timestamp
51+
}
52+
})
4553

4654
public constructor(private readonly tails: Promise<Tail>[], cb: OnData, styleOf: Record<WorkerState, TextProps>) {
4755
tails.map((tailf) => {
@@ -60,7 +68,7 @@ export default class Live {
6068

6169
if (!name || !timestamp) {
6270
// console.error("Bad status record", line)
63-
this.pushLineAndPublish(data, cb)
71+
// this.pushLineAndPublish(data, metric, timestamp, cb)
6472
return
6573
} else if (!metric) {
6674
// ignoring this line
@@ -93,7 +101,7 @@ export default class Live {
93101

94102
// inform the UI that we have updates
95103
cb({
96-
lines: this.pushLine(data),
104+
lines: this.pushLine(data, metric, timestamp),
97105
workers: Object.values(this.workers),
98106
})
99107
}
@@ -112,40 +120,45 @@ export default class Live {
112120
})
113121
}
114122

123+
private readonly lookup: Record<string, Line> = {}
115124
/** Add `line` to our circular buffer `this.lines` */
116-
private pushLine(line: string) {
117-
if (this.lines.includes(line)) {
118-
// duplicate line
119-
// the oldest is the one we are about to overwrite
120-
let oldestIdx = this.linesInsertionIdx
121-
if (this.lines[oldestIdx].length === 0) {
122-
do {
123-
oldestIdx = (oldestIdx + 1) % Live.nLines
124-
} while (this.lines[oldestIdx].length === 0)
125-
}
126-
return { lines: this.lines, idx: oldestIdx, N: this.linesCount }
125+
private pushLine(line: string, metric: WorkerState, timestamp: number) {
126+
const key = line
127+
.replace(/\s*(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ)\s*/, "{timestamp}")
128+
.replace(/pod\/torchx-\S+ /, "") // worker name in torchx
129+
.replace(/pod\/ray-(head|worker)-\S+ /, "") // worker name in ray
130+
.replace(/\* /, "") // wildcard worker name (codeflare)
131+
.replace(/\x1b\x5B\[2J/g, "") // eslint-disable-line no-control-regex
132+
// ^^^ [2J is part of clear screen; we don't want those to flow through
133+
134+
const rec = {
135+
timestamp,
136+
stateRank: rankFor[metric],
137+
line: key,
138+
}
139+
140+
const already = this.lookup[rec.line]
141+
if (already) {
142+
already.timestamp = timestamp
143+
this.lines.updateItem(already)
127144
} else {
128-
const idx = this.linesInsertionIdx
129-
this.linesInsertionIdx = (this.linesInsertionIdx + 1) % Live.nLines
130-
131-
this.lines[idx] = line
132-
this.linesCount = Math.min(this.lines.length, this.linesCount + 1)
133-
134-
// the oldest is the one we are about to overwrite
135-
let oldestIdx = this.linesInsertionIdx
136-
if (this.lines[oldestIdx].length === 0) {
137-
do {
138-
oldestIdx = (oldestIdx + 1) % Live.nLines
139-
} while (this.lines[oldestIdx].length === 0)
145+
this.lookup[rec.line] = rec
146+
if (this.lines.size() >= Live.MAX_HEAP) {
147+
this.lines.replace(rec)
148+
} else {
149+
this.lines.push(rec)
140150
}
141-
142-
return { lines: this.lines, idx: oldestIdx, N: this.linesCount }
143151
}
152+
153+
return this.lines
154+
.toArray()
155+
.slice(0, 18)
156+
.sort((a, b) => a.timestamp - b.timestamp)
144157
}
145158

146159
/** `pushLine` and then pass the updated model to `cb` */
147-
private pushLineAndPublish(line: string, cb: OnData) {
148-
cb({ lines: this.pushLine(line), workers: Object.values(this.workers) })
160+
private pushLineAndPublish(line: string, metric: WorkerState, timestamp: number, cb: OnData) {
161+
cb({ lines: this.pushLine(line, metric, timestamp), workers: Object.values(this.workers) })
149162
}
150163

151164
private asMillisSinceEpoch(timestamp: string) {

plugins/plugin-codeflare-dashboard/src/controller/dashboard/status/states.ts

+13-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@ export const states = ["Queued", "Provisioning", "Initializing", "Running", "Suc
1919
/** Type declaration for quantized utilization states */
2020
export type WorkerState = (typeof states)[number]
2121

22+
/** Lower means more important */
23+
export const rankFor: Record<WorkerState, number> = {
24+
Queued: 3,
25+
Provisioning: 1,
26+
Initializing: 2,
27+
Running: 1,
28+
Success: 0,
29+
Failed: 0,
30+
}
31+
2232
export const stateFor: Record<string, WorkerState> = {
2333
Pending: "Queued",
2434
Queued: "Queued",
@@ -32,9 +42,9 @@ export const stateFor: Record<string, WorkerState> = {
3242

3343
Unhealthy: "Provisioning", // service not yet ready... Initializing?
3444

35-
Initializing: "Initializing",
36-
Installing: "Initializing",
37-
Pulling: "Initializing",
45+
Initializing: "Provisioining",
46+
Installing: "Provisioining",
47+
Pulling: "Provisioining",
3848
Pulled: "Initializing",
3949

4050
Started: "Initializing",

plugins/plugin-codeflare-dashboard/src/controller/env.ts

+7-3
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,14 @@ export async function getJobEnv(profile: string, jobId: string): Promise<Record<
4848
}
4949

5050
export async function numGpus(profile: string, jobId: string): Promise<number> {
51-
const env = await getJobEnv(profile, jobId)
51+
try {
52+
const env = await getJobEnv(profile, jobId)
5253

53-
const raw = env["NUM_GPUS"]
54-
return typeof raw === "number" ? raw : typeof raw === "string" ? parseInt(raw, 10) : 0
54+
const raw = env["NUM_GPUS"]
55+
return typeof raw === "number" ? raw : typeof raw === "string" ? parseInt(raw, 10) : 0
56+
} catch (err) {
57+
return 0
58+
}
5559
}
5660

5761
export async function usesGpus(profile: string, jobId: string): Promise<boolean> {

0 commit comments

Comments
 (0)