1
1
import * as vscode from "vscode" ;
2
2
import path from "node:path" ;
3
+ import Logger from "../logger" ;
3
4
4
5
const tokenize = async ( text : string , url : string ) => {
5
6
try {
@@ -53,6 +54,9 @@ const spliteDocumentByPosition = (
53
54
) ;
54
55
return [ textBefore , textAfter ] ;
55
56
} ;
57
+ const inverseSquareRoot = ( x : number ) => 1 / Math . sqrt ( x ) ;
58
+ const randomFromInterval = ( min : number , max : number ) =>
59
+ Math . floor ( Math . random ( ) * ( max - min + 1 ) + min ) ;
56
60
57
61
const processingDocumentWithPosition = async ( {
58
62
document,
@@ -66,34 +70,56 @@ const processingDocumentWithPosition = async ({
66
70
maxToken : number ;
67
71
} ) => {
68
72
const [ textBefore , textAfter ] = spliteDocumentByPosition ( document , position ) ;
69
- let beforeTokens = 50 ;
70
- let afterTokens = 50 ;
73
+
74
+ let beforeTokens = maxToken / 2 ;
75
+ let afterTokens = maxToken / 2 ;
71
76
72
77
let textBeforeSlice : string ;
73
78
let textAfterSlice : string ;
74
79
75
- let resToken = 0 ;
80
+ let tokens = 0 ;
76
81
77
82
while ( true ) {
78
- textBeforeSlice = textBefore . slice ( beforeTokens * - 1 ) ;
79
- textAfterSlice = textAfter . slice ( 0 , afterTokens ) ;
83
+ textBeforeSlice = textBefore . slice ( beforeTokens * 3 * - 1 ) ;
84
+ textAfterSlice = textAfter . slice ( 0 , afterTokens * 3 ) ;
80
85
81
- resToken = await tokenize ( textBeforeSlice + textAfterSlice , url ) ;
86
+ tokens = await tokenize ( textBeforeSlice + textAfterSlice , url ) ;
87
+ const tokenDifference = Math . abs ( maxToken - tokens ) ;
88
+ const maxDifference = Math . max ( maxToken * 0.1 , 10 ) ;
82
89
90
+ const documentName = document . fileName ;
91
+ Logger . debug ( `${ documentName } document tokens: ${ tokens } ` ) ;
83
92
if (
84
- resToken >= maxToken ||
85
- ( textBeforeSlice . length >= textBefore . length &&
86
- textAfterSlice . length >= textAfter . length )
93
+ ( tokens <= maxToken &&
94
+ textBeforeSlice . length >= textBefore . length &&
95
+ textAfterSlice . length >= textAfter . length ) ||
96
+ tokenDifference <= maxDifference
87
97
) {
88
98
return {
89
99
documentText : `${ textBeforeSlice } <|fim▁hole|>${ textAfterSlice } ` ,
90
- documentTokens : resToken ,
100
+ documentTokens : tokens ,
91
101
} ;
92
102
}
93
103
94
- beforeTokens =
95
- Number ( ( beforeTokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
96
- afterTokens = Number ( ( afterTokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
104
+ if ( tokens <= maxToken ) {
105
+ beforeTokens +=
106
+ inverseSquareRoot ( beforeTokens / maxToken ) *
107
+ randomFromInterval ( 30 , 60 ) *
108
+ 4 ;
109
+ afterTokens +=
110
+ inverseSquareRoot ( afterTokens / maxToken ) *
111
+ randomFromInterval ( 30 , 60 ) *
112
+ 4 ;
113
+ } else {
114
+ beforeTokens -=
115
+ inverseSquareRoot ( beforeTokens / maxToken ) *
116
+ randomFromInterval ( 30 , 60 ) *
117
+ 4 ;
118
+ afterTokens -=
119
+ inverseSquareRoot ( afterTokens / maxToken ) *
120
+ randomFromInterval ( 30 , 60 ) *
121
+ 4 ;
122
+ }
97
123
}
98
124
} ;
99
125
@@ -107,25 +133,58 @@ const processingDocument = async ({
107
133
maxToken : number ;
108
134
} ) => {
109
135
const text = getTextNormalized ( document . getText ( ) ) ;
110
- let tokens = 50 ;
111
136
112
- let textSlice : string ;
137
+ let tokens = maxToken ;
113
138
114
- let resToken = 0 ;
139
+ let textSlice : string ;
115
140
116
141
while ( true ) {
117
- textSlice = text . slice ( 0 , tokens ) ;
142
+ Logger . debug ( "New iteration of the while loop" ) ;
143
+
144
+ textSlice = text . slice ( 0 , Number ( tokens . toFixed ( 0 ) ) * 3 ) ;
145
+
146
+ tokens = await tokenize ( textSlice , url ) ;
118
147
119
- resToken = await tokenize ( textSlice , url ) ;
148
+ const tokenDifference = Math . abs ( maxToken - tokens ) ;
149
+ const maxDifference = Math . max ( maxToken * 0.05 , 10 ) ;
150
+
151
+ const logMessage = `Text slice length: ${ textSlice . length } , Tokens after tokenization: ${ tokens } , Max token: ${ maxToken } , Token difference: ${ tokenDifference } ` ;
152
+
153
+ Logger . debug ( logMessage ) ;
154
+
155
+ const documentName = document . fileName ;
156
+ Logger . debug ( `${ documentName } document tokens: ${ tokens } ` ) ;
157
+ if (
158
+ ( tokens <= maxToken && textSlice . length >= text . length ) ||
159
+ tokenDifference <= maxDifference
160
+ ) {
161
+ Logger . debug ( `${ documentName } document tokens resualt: ${ tokens } ` ) ;
120
162
121
- if ( resToken >= maxToken || textSlice . length >= text . length ) {
122
163
return {
123
164
documentText : textSlice ,
124
- documentTokens : resToken ,
165
+ documentTokens : tokens ,
125
166
} ;
126
167
}
127
168
128
- tokens = Number ( ( tokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
169
+ if ( tokens <= maxToken ) {
170
+ const ratio = tokens / maxToken ;
171
+ Logger . debug ( `Calculating increment for ratio: ${ ratio } ` ) ;
172
+
173
+ const increment = inverseSquareRoot ( ratio ) * randomFromInterval ( 10 , 20 ) ;
174
+ Logger . debug ( `Increment calculated: ${ increment } ` ) ;
175
+
176
+ tokens += increment ;
177
+ Logger . debug ( `Tokens incremented by: ${ increment } ` ) ;
178
+ } else {
179
+ const ratio = tokens / maxToken ;
180
+ Logger . debug ( `Calculating decrement for ratio: ${ ratio } ` ) ;
181
+
182
+ const decrement = inverseSquareRoot ( ratio ) * randomFromInterval ( 250 , 500 ) ;
183
+ Logger . debug ( `Decrement calculated: ${ decrement } ` ) ;
184
+
185
+ tokens -= decrement ;
186
+ Logger . debug ( `Tokens decremented by: ${ decrement } ` ) ;
187
+ }
129
188
}
130
189
} ;
131
190
@@ -148,7 +207,7 @@ export const getPromptCompletion = async ({
148
207
maxTokenExpect : number ;
149
208
url : string ;
150
209
} ) => {
151
- const maxTokenHardLimit = 4000 ;
210
+ const maxTokenHardLimit = 10000 ;
152
211
const maxToken =
153
212
maxTokenExpect > maxTokenHardLimit ? maxTokenHardLimit : maxTokenExpect ;
154
213
@@ -170,18 +229,23 @@ export const getPromptCompletion = async ({
170
229
) {
171
230
let restTokens = maxToken - activeDocumentTokens ;
172
231
for ( const document of additionalDocuments ) {
232
+ if ( restTokens <= 50 ) {
233
+ break ;
234
+ }
173
235
const { documentText, documentTokens } = await processingDocument ( {
174
236
document,
175
237
maxToken : restTokens ,
176
238
url,
177
239
} ) ;
240
+ const documentName = document . fileName ;
241
+
242
+ Logger . debug (
243
+ `${ documentName } document tokens resualt: ${ documentTokens } `
244
+ ) ;
178
245
179
246
additionalDocumentsText +=
180
247
"\n" + getRelativePath ( document . uri ) + "\n" + documentText ;
181
248
restTokens -= documentTokens ;
182
- if ( restTokens <= 0 ) {
183
- break ;
184
- }
185
249
}
186
250
}
187
251
0 commit comments