Skip to content

Commit ebf9172

Browse files
committed
improve prompt processing
1 parent 94ae146 commit ebf9172

File tree

1 file changed

+89
-25
lines changed

1 file changed

+89
-25
lines changed

src/common/prompt/promptCompletion.ts

Lines changed: 89 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import * as vscode from "vscode";
22
import path from "node:path";
3+
import Logger from "../logger";
34

45
const tokenize = async (text: string, url: string) => {
56
try {
@@ -53,6 +54,9 @@ const spliteDocumentByPosition = (
5354
);
5455
return [textBefore, textAfter];
5556
};
57+
const inverseSquareRoot = (x: number) => 1 / Math.sqrt(x);
58+
const randomFromInterval = (min: number, max: number) =>
59+
Math.floor(Math.random() * (max - min + 1) + min);
5660

5761
const processingDocumentWithPosition = async ({
5862
document,
@@ -66,34 +70,56 @@ const processingDocumentWithPosition = async ({
6670
maxToken: number;
6771
}) => {
6872
const [textBefore, textAfter] = spliteDocumentByPosition(document, position);
69-
let beforeTokens = 50;
70-
let afterTokens = 50;
73+
74+
let beforeTokens = maxToken / 2;
75+
let afterTokens = maxToken / 2;
7176

7277
let textBeforeSlice: string;
7378
let textAfterSlice: string;
7479

75-
let resToken = 0;
80+
let tokens = 0;
7681

7782
while (true) {
78-
textBeforeSlice = textBefore.slice(beforeTokens * -1);
79-
textAfterSlice = textAfter.slice(0, afterTokens);
83+
textBeforeSlice = textBefore.slice(beforeTokens * 3 * -1);
84+
textAfterSlice = textAfter.slice(0, afterTokens * 3);
8085

81-
resToken = await tokenize(textBeforeSlice + textAfterSlice, url);
86+
tokens = await tokenize(textBeforeSlice + textAfterSlice, url);
87+
const tokenDifference = Math.abs(maxToken - tokens);
88+
const maxDifference = Math.max(maxToken * 0.1, 10);
8289

90+
const documentName = document.fileName;
91+
Logger.debug(`${documentName} document tokens: ${tokens}`);
8392
if (
84-
resToken >= maxToken ||
85-
(textBeforeSlice.length >= textBefore.length &&
86-
textAfterSlice.length >= textAfter.length)
93+
(tokens <= maxToken &&
94+
textBeforeSlice.length >= textBefore.length &&
95+
textAfterSlice.length >= textAfter.length) ||
96+
tokenDifference <= maxDifference
8797
) {
8898
return {
8999
documentText: `${textBeforeSlice}<|fim▁hole|>${textAfterSlice}`,
90-
documentTokens: resToken,
100+
documentTokens: tokens,
91101
};
92102
}
93103

94-
beforeTokens =
95-
Number((beforeTokens * (maxToken / resToken)).toFixed(0)) + 5;
96-
afterTokens = Number((afterTokens * (maxToken / resToken)).toFixed(0)) + 5;
104+
if (tokens <= maxToken) {
105+
beforeTokens +=
106+
inverseSquareRoot(beforeTokens / maxToken) *
107+
randomFromInterval(30, 60) *
108+
4;
109+
afterTokens +=
110+
inverseSquareRoot(afterTokens / maxToken) *
111+
randomFromInterval(30, 60) *
112+
4;
113+
} else {
114+
beforeTokens -=
115+
inverseSquareRoot(beforeTokens / maxToken) *
116+
randomFromInterval(30, 60) *
117+
4;
118+
afterTokens -=
119+
inverseSquareRoot(afterTokens / maxToken) *
120+
randomFromInterval(30, 60) *
121+
4;
122+
}
97123
}
98124
};
99125

@@ -107,25 +133,58 @@ const processingDocument = async ({
107133
maxToken: number;
108134
}) => {
109135
const text = getTextNormalized(document.getText());
110-
let tokens = 50;
111136

112-
let textSlice: string;
137+
let tokens = maxToken;
113138

114-
let resToken = 0;
139+
let textSlice: string;
115140

116141
while (true) {
117-
textSlice = text.slice(0, tokens);
142+
Logger.debug("New iteration of the while loop");
143+
144+
textSlice = text.slice(0, Number(tokens.toFixed(0)) * 3);
145+
146+
tokens = await tokenize(textSlice, url);
118147

119-
resToken = await tokenize(textSlice, url);
148+
const tokenDifference = Math.abs(maxToken - tokens);
149+
const maxDifference = Math.max(maxToken * 0.05, 10);
150+
151+
const logMessage = `Text slice length: ${textSlice.length}, Tokens after tokenization: ${tokens}, Max token: ${maxToken}, Token difference: ${tokenDifference}`;
152+
153+
Logger.debug(logMessage);
154+
155+
const documentName = document.fileName;
156+
Logger.debug(`${documentName} document tokens: ${tokens}`);
157+
if (
158+
(tokens <= maxToken && textSlice.length >= text.length) ||
159+
tokenDifference <= maxDifference
160+
) {
161+
Logger.debug(`${documentName} document tokens resualt: ${tokens}`);
120162

121-
if (resToken >= maxToken || textSlice.length >= text.length) {
122163
return {
123164
documentText: textSlice,
124-
documentTokens: resToken,
165+
documentTokens: tokens,
125166
};
126167
}
127168

128-
tokens = Number((tokens * (maxToken / resToken)).toFixed(0)) + 5;
169+
if (tokens <= maxToken) {
170+
const ratio = tokens / maxToken;
171+
Logger.debug(`Calculating increment for ratio: ${ratio}`);
172+
173+
const increment = inverseSquareRoot(ratio) * randomFromInterval(10, 20);
174+
Logger.debug(`Increment calculated: ${increment}`);
175+
176+
tokens += increment;
177+
Logger.debug(`Tokens incremented by: ${increment}`);
178+
} else {
179+
const ratio = tokens / maxToken;
180+
Logger.debug(`Calculating decrement for ratio: ${ratio}`);
181+
182+
const decrement = inverseSquareRoot(ratio) * randomFromInterval(250, 500);
183+
Logger.debug(`Decrement calculated: ${decrement}`);
184+
185+
tokens -= decrement;
186+
Logger.debug(`Tokens decremented by: ${decrement}`);
187+
}
129188
}
130189
};
131190

@@ -148,7 +207,7 @@ export const getPromptCompletion = async ({
148207
maxTokenExpect: number;
149208
url: string;
150209
}) => {
151-
const maxTokenHardLimit = 4000;
210+
const maxTokenHardLimit = 10000;
152211
const maxToken =
153212
maxTokenExpect > maxTokenHardLimit ? maxTokenHardLimit : maxTokenExpect;
154213

@@ -170,18 +229,23 @@ export const getPromptCompletion = async ({
170229
) {
171230
let restTokens = maxToken - activeDocumentTokens;
172231
for (const document of additionalDocuments) {
232+
if (restTokens <= 50) {
233+
break;
234+
}
173235
const { documentText, documentTokens } = await processingDocument({
174236
document,
175237
maxToken: restTokens,
176238
url,
177239
});
240+
const documentName = document.fileName;
241+
242+
Logger.debug(
243+
`${documentName} document tokens resualt: ${documentTokens}`
244+
);
178245

179246
additionalDocumentsText +=
180247
"\n" + getRelativePath(document.uri) + "\n" + documentText;
181248
restTokens -= documentTokens;
182-
if (restTokens <= 0) {
183-
break;
184-
}
185249
}
186250
}
187251

0 commit comments

Comments
 (0)