From 34901bffe4dda364ca49f08983f7af16fc3a9364 Mon Sep 17 00:00:00 2001 From: Umesh Madan Date: Thu, 30 Jan 2025 18:24:18 -0800 Subject: [PATCH] Timestamping --- ts/examples/chat/src/memory/common.ts | 6 -- ts/examples/chat/src/memory/knowproMemory.ts | 31 +++++++- ts/examples/chat/src/memory/podcastMemory.ts | 3 +- ts/packages/knowPro/src/import.ts | 77 ++++++++++++++------ 4 files changed, 84 insertions(+), 33 deletions(-) diff --git a/ts/examples/chat/src/memory/common.ts b/ts/examples/chat/src/memory/common.ts index 64941b197..1dc0395b3 100644 --- a/ts/examples/chat/src/memory/common.ts +++ b/ts/examples/chat/src/memory/common.ts @@ -205,12 +205,6 @@ export function argToDate(value: string | undefined): Date | undefined { return value ? dateTime.stringToDate(value) : undefined; } -export function addMinutesToDate(date: Date, minutes: number): Date { - const time = date.getTime(); - const offsetMs = minutes * 60 * 1000; - return new Date(time + offsetMs); -} - export function parseFreeAndNamedArguments( args: string[], argDefs: CommandMetadata, diff --git a/ts/examples/chat/src/memory/knowproMemory.ts b/ts/examples/chat/src/memory/knowproMemory.ts index 272757d84..f106356f8 100644 --- a/ts/examples/chat/src/memory/knowproMemory.ts +++ b/ts/examples/chat/src/memory/knowproMemory.ts @@ -20,10 +20,11 @@ import { addFileNameSuffixToPath, argDestFile, argSourceFile, + argToDate, parseFreeAndNamedArguments, recordFromArgs, } from "./common.js"; -import { ensureDir, readJsonFile, writeJsonFile } from "typeagent"; +import { dateTime, ensureDir, readJsonFile, writeJsonFile } from "typeagent"; import path from "path"; import chalk from "chalk"; import { KnowProPrinter } from "./knowproPrinter.js"; @@ -47,8 +48,9 @@ export async function createKnowproCommands( }; await ensureDir(context.basePath); - commands.kpShowMessages = showMessages; + commands.kpPodcastMessages = showMessages; commands.kpPodcastImport = podcastImport; + commands.kpPodcastTimestamp = podcastTimestamp; commands.kpPodcastSave = podcastSave; commands.kpPodcastLoad = podcastLoad; commands.kpSearchTerms = searchTerms; @@ -66,7 +68,7 @@ export async function createKnowproCommands( }, }; } - commands.kpShowMessages.metadata = "Show all messages"; + commands.kpPodcastMessages.metadata = "Show all messages"; async function showMessages(args: string[]) { const conversation = ensureConversationLoaded(); if (!conversation) { @@ -120,6 +122,29 @@ export async function createKnowproCommands( await podcastSave(namedArgs); } + function podcastTimestampDef(): CommandMetadata { + return { + description: "Set timestamps", + args: { + startAt: arg("Start date and time"), + }, + options: { + length: argNum("Length of the podcast in minutes", 60), + }, + }; + } + commands.kpPodcastTimestamp.metadata = podcastTimestampDef(); + async function podcastTimestamp(args: string[]) { + const conversation = ensureConversationLoaded(); + if (!conversation) { + return; + } + const namedArgs = parseNamedArguments(args, podcastTimestampDef()); + const startAt = argToDate(namedArgs.startAt)!; + const endAt = dateTime.addMinutesToDate(startAt, namedArgs.length); + kp.timestampMessages(conversation.messages, startAt, endAt); + } + function podcastSaveDef(): CommandMetadata { return { description: "Save Podcast", diff --git a/ts/examples/chat/src/memory/podcastMemory.ts b/ts/examples/chat/src/memory/podcastMemory.ts index 1eec19e25..3af569844 100644 --- a/ts/examples/chat/src/memory/podcastMemory.ts +++ b/ts/examples/chat/src/memory/podcastMemory.ts @@ -21,7 +21,6 @@ import { parseNamedArguments, } from "interactive-app"; import { - addMinutesToDate, argClean, argPause, argSourceFileOrFolder, @@ -480,7 +479,7 @@ export function createPodcastCommands( const sourcePath = namedArgs.sourcePath; const startAt = argToDate(namedArgs.startAt); const endAt = startAt - ? addMinutesToDate(startAt, namedArgs.length) + ? dateTime.addMinutesToDate(startAt, namedArgs.length) : undefined; await importTranscript(sourcePath, startAt, endAt); } diff --git a/ts/packages/knowPro/src/import.ts b/ts/packages/knowPro/src/import.ts index a5be529e2..0070be2f1 100644 --- a/ts/packages/knowPro/src/import.ts +++ b/ts/packages/knowPro/src/import.ts @@ -10,7 +10,7 @@ import { ITextEmbeddingData, } from "./dataFormat.js"; import { conversation, split } from "knowledge-processor"; -import { collections, getFileName, readAllText } from "typeagent"; +import { collections, dateTime, getFileName, readAllText } from "typeagent"; import { ConversationIndex, addActionToIndex, @@ -159,28 +159,14 @@ export class Podcast implements IConversation { } } - generateTimestamps() { + public generateTimestamps(startDate?: Date, lengthMinutes: number = 60) { // generate a random date within the last 10 years - const date = new Date(); - const startHour = 14; - date.setFullYear(date.getFullYear() - Math.floor(Math.random() * 10)); - date.setMonth(Math.floor(Math.random() * 12)); - date.setDate(Math.floor(Math.random() * 28)); - const seconds = 3600; - let cumulativeLength = 0; - const cumulativeLengths = this.messages.map((msg) => { - const msgCum = cumulativeLength; - cumulativeLength += msg.textChunks[0].length; - return msgCum; - }); - for (let i = 0; i < this.messages.length; i++) { - const lengthPct = cumulativeLengths[i] / cumulativeLength; - const msgSeconds = lengthPct * seconds; - const minutes = Math.floor((msgSeconds % 3600) / 60); - const secs = Math.floor(msgSeconds % 60); - const timestamp = `${date.toISOString()}T${startHour}:${minutes}:${secs}`; - this.messages[i].timestamp = timestamp; - } + startDate ??= randomDate(); + timestampMessages( + this.messages, + startDate, + dateTime.addMinutesToDate(startDate, lengthMinutes), + ); } public async buildIndex( @@ -295,3 +281,50 @@ export async function importPodcast( // what did K say about Children of Time? return pod; } + +/** + * Text (such as a transcript) can be collected over a time range. + * This text can be partitioned into blocks. However, timestamps for individual blocks are not available. + * Assigns individual timestamps to blocks proportional to their lengths. + * @param turns Transcript turns to assign timestamps to + * @param startDate starting + * @param endDate + */ +export function timestampMessages( + messages: IMessage[], + startDate: Date, + endDate: Date, +): void { + let startTicks = startDate.getTime(); + const ticksLength = endDate.getTime() - startTicks; + if (ticksLength <= 0) { + throw new Error(`${startDate} is not < ${endDate}`); + } + let messageLengths = messages.map((m) => messageLength(m)); + const textLength: number = messageLengths.reduce( + (total: number, l) => total + l, + 0, + ); + const ticksPerChar = ticksLength / textLength; + for (let i = 0; i < messages.length; ++i) { + messages[i].timestamp = new Date(startTicks).toISOString(); + // Now, we will 'elapse' time .. proportional to length of the text + // This assumes that each speaker speaks equally fast... + startTicks += ticksPerChar * messageLengths[i]; + } + + function messageLength(message: IMessage): number { + return message.textChunks.reduce( + (total: number, chunk) => total + chunk.length, + 0, + ); + } +} + +function randomDate(startHour = 14) { + const date = new Date(); + date.setFullYear(date.getFullYear() - Math.floor(Math.random() * 10)); + date.setMonth(Math.floor(Math.random() * 12)); + date.setDate(Math.floor(Math.random() * 28)); + return date; +}