Skip to content
This repository was archived by the owner on Mar 28, 2025. It is now read-only.

Commit f9d58fd

Browse files
chore: updated TS client
Signed-off-by: Adithya Krishna <[email protected]>
1 parent ad75c76 commit f9d58fd

File tree

3 files changed

+135
-85
lines changed

3 files changed

+135
-85
lines changed

src/client.ts

+117-78
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,13 @@ class IndexifyClient {
196196
name: string,
197197
query: string,
198198
topK: number,
199-
filters: string[],
199+
filters?: string[],
200200
include_content: boolean = true
201201
): Promise<ISearchIndexResponse[]> {
202202
const resp = await this.client.post(`/indexes/${name}/search`, {
203203
query,
204204
k: topK,
205-
filters: filters,
205+
...(filters !== undefined && { filters }),
206206
include_content,
207207
});
208208
return resp.data["results"];
@@ -224,88 +224,109 @@ class IndexifyClient {
224224
}
225225

226226
async getExtractedContent({
227-
parentId,
228-
source,
229-
labelsEq,
230-
startId,
231-
limit,
232-
returnTotal = false,
227+
contentId,
228+
graphName,
229+
policyName,
230+
blocking = false,
233231
}: {
234-
parentId?: string;
235-
source?: string;
236-
labelsEq?: string;
237-
startId?: string;
238-
limit?: number;
239-
returnTotal?: boolean;
240-
} = {}): Promise<{ contentList: IContentMetadata[]; total?: number }> {
241-
const resp = await this.client.get("content", {
242-
params: {
243-
parent_id: parentId,
244-
labels_eq: labelsEq,
245-
source,
246-
start_id: startId,
247-
limit,
248-
return_total: returnTotal,
249-
},
250-
});
251-
const contentList = resp.data.content_list.map(
252-
(content: IBaseContentMetadata) => {
253-
return this.baseContentToContentMetadata(content);
254-
}
232+
contentId: string;
233+
graphName: string;
234+
policyName: string;
235+
blocking?: boolean;
236+
}): Promise<{ contentList: IContentMetadata[]; total?: number }> {
237+
if (blocking) {
238+
await this.waitForExtraction(contentId);
239+
}
240+
241+
const response = await this.client.get(
242+
`namespaces/${this.namespace}/extraction_graphs/${graphName}/extraction_policies/${policyName}/content/${contentId}`,
255243
);
256-
return { contentList, total: resp.data.total };
244+
245+
const contentTree = response.data;
246+
const contentList: IContentMetadata[] = [];
247+
248+
for (const item of contentTree.content_tree_metadata) {
249+
if (item.extraction_graph_names.includes(graphName) && item.source === policyName) {
250+
const baseContent: IBaseContentMetadata = {
251+
id: item.id,
252+
parent_id: item.parent_id,
253+
ingested_content_id: contentId,
254+
namespace: item.namespace,
255+
name: item.name,
256+
mime_type: item.mime_type,
257+
labels: item.labels,
258+
storage_url: item.storage_url,
259+
created_at: item.created_at,
260+
source: item.source,
261+
size: item.size,
262+
hash: item.hash,
263+
extraction_graph_names: item.extraction_graph_names,
264+
};
265+
266+
const contentMetadata = this.baseContentToContentMetadata(baseContent);
267+
contentList.push(contentMetadata);
268+
}
269+
}
270+
271+
return { contentList };
257272
}
258273

259274
async addDocuments(
260-
extractionGraphNames: string | string[],
261-
documents:
262-
| IDocument
263-
| string
264-
| IDocument[]
265-
| string[]
266-
| (IDocument | string)[]
267-
) {
268-
function isIDocument(obj: any): obj is IDocument {
269-
return (
270-
obj && typeof obj.text === "string" && typeof obj.labels === "object"
271-
);
275+
extractionGraphs: string | string[],
276+
documents: IDocument | string | (IDocument | string)[],
277+
docId?: string
278+
): Promise<string[]> {
279+
let extractionGraphsArray: string[];
280+
if (typeof extractionGraphs === 'string') {
281+
extractionGraphsArray = [extractionGraphs];
282+
} else {
283+
extractionGraphsArray = extractionGraphs;
272284
}
273285

274-
let newDocuments: IDocument[] = [];
275-
276-
if (typeof documents === "string") {
277-
newDocuments.push({ text: documents as string, labels: {} });
278-
} else if (isIDocument(documents)) {
279-
newDocuments.push(documents);
280-
} else if (Array.isArray(documents)) {
281-
newDocuments = [
282-
...newDocuments,
283-
...(documents.map((item) => {
284-
if (isIDocument(item)) {
285-
return item;
286-
} else if (typeof item === "string") {
287-
return { text: item, labels: {} };
288-
} else {
289-
throw Error(
290-
"Invalid Type: Array items must be string or IDocument"
291-
);
292-
}
293-
}) as IDocument[]),
294-
];
286+
let documentsArray: IDocument[];
287+
if (documents instanceof Array) {
288+
documentsArray = documents.map(doc => {
289+
if (typeof doc === 'string') {
290+
return { text: doc, labels: {}, id: undefined };
291+
} else {
292+
return doc;
293+
}
294+
});
295+
} else if (typeof documents === 'string') {
296+
documentsArray = [{ text: documents, labels: {}, id: docId }];
295297
} else {
296-
throw Error(
297-
"Invalid type for documents. Expected Document, str, or list of these."
298-
);
298+
documentsArray = [documents];
299299
}
300300

301-
const extractionGraphNamesArray = Array.isArray(extractionGraphNames)
302-
? extractionGraphNames
303-
: [extractionGraphNames];
304-
305-
await this.client.post("add_texts", {
306-
documents: newDocuments,
307-
extraction_graph_names: extractionGraphNamesArray,
301+
// Add mime_type to all documents
302+
documentsArray.forEach(doc => {
303+
doc.labels['mime_type'] = 'text/plain';
308304
});
305+
306+
const contentIds: string[] = [];
307+
308+
for (const extractionGraph of extractionGraphsArray) {
309+
for (const document of documentsArray) {
310+
const formData = new FormData();
311+
formData.append('file', new Blob([document.text], { type: 'text/plain' }), 'document.txt');
312+
formData.append('labels', JSON.stringify(document.labels));
313+
314+
const response = await this.client.post(
315+
`namespaces/${this.namespace}/extraction_graphs/${extractionGraph}/extract`,
316+
formData,
317+
{
318+
headers: {
319+
'Content-Type': 'multipart/form-data',
320+
},
321+
}
322+
);
323+
324+
const contentId = response.data.content_id;
325+
contentIds.push(contentId);
326+
}
327+
}
328+
329+
return contentIds;
309330
}
310331

311332
async getContentMetadata(id: string): Promise<IContentMetadata> {
@@ -318,11 +339,6 @@ class IndexifyClient {
318339
return resp.data.metadata;
319340
}
320341

321-
async getContentTree(id: string): Promise<IContentMetadata[]> {
322-
const resp = await this.client.get(`content/${id}/content-tree`);
323-
return resp.data.content_tree_metadata;
324-
}
325-
326342
async downloadContent<T>(id: string): Promise<T> {
327343
try {
328344
const response = await this.client.get(`content/${id}/download`);
@@ -474,6 +490,29 @@ class IndexifyClient {
474490
return resp.data;
475491
}
476492

493+
async waitForExtraction(contentIds: string | string[]): Promise<void> {
494+
const ids = typeof contentIds === 'string' ? [contentIds] : contentIds;
495+
496+
console.log("Waiting for extraction to complete for content id: ", ids.join(","));
497+
498+
for (const contentId of ids) {
499+
try {
500+
const response = await this.client.get(
501+
`namespaces/${this.namespace}/content/${contentId}/wait`
502+
);
503+
504+
console.log("Extraction completed for content id: ", contentId);
505+
506+
if (response.status >= 400) {
507+
throw new Error(`HTTP error! status: ${response.status}`);
508+
}
509+
} catch (error) {
510+
console.error(`Error waiting for extraction of content id ${contentId}:`, error);
511+
throw error;
512+
}
513+
}
514+
}
515+
477516
async ingestRemoteFile(
478517
url: string,
479518
mime_type: string,

src/types.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ export interface IIndex {
4242
export interface IBaseContentMetadata {
4343
id: string;
4444
parent_id: string;
45-
root_content_id: string;
45+
ingested_content_id: string;
4646
namespace: string;
4747
name: string;
4848
mime_type: string;

tests/client.test.ts

+17-6
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,11 @@ test("addDocuments", async () => {
102102
{ text: "This is a mixed test 2", labels: {} },
103103
]);
104104

105-
const {contentList} = await client.getExtractedContent();
105+
const {contentList} = await client.getExtractedContent({
106+
contentId: "idontexist",
107+
graphName: extractionGraphName,
108+
policyName: "",
109+
});
106110
expect(contentList.length).toBe(8);
107111
});
108112

@@ -174,22 +178,27 @@ test.only("getExtractedContent", async () => {
174178

175179
let contentList;
176180
let resp = await client.getExtractedContent({
177-
parentId: "idontexist",
181+
contentId: "idontexist",
182+
graphName: extractionGraphName,
183+
policyName: "",
178184
});
179185
contentList = resp.contentList
180186
expect(contentList.length).toBe(0);
181187

182188
resp = await client.getExtractedContent({
183-
labelsEq: "source:test",
184-
returnTotal:true
189+
contentId: "idontexist",
190+
graphName: extractionGraphName,
191+
policyName: "",
185192
});
186193
contentList = resp.contentList
187194
expect(contentList.length).toBe(2);
188195
expect(resp.total).toBe(2);
189196
expect(contentList[0].content_url).toContain("http://");
190197

191198
resp = await client.getExtractedContent({
192-
labelsEq: "source:nothing",
199+
contentId: "idontexist",
200+
graphName: extractionGraphName,
201+
policyName: ""
193202
});
194203
contentList = resp.contentList
195204
expect(contentList.length).toBe(0);
@@ -260,7 +269,9 @@ test("downloadContent", async () => {
260269
]);
261270

262271
const { contentList } = await client.getExtractedContent({
263-
labelsEq: "source:testdownload",
272+
contentId: "",
273+
graphName: extractionGraphName,
274+
policyName: ""
264275
});
265276
expect(contentList.length).toBeGreaterThanOrEqual(1);
266277

0 commit comments

Comments
 (0)