Skip to content

Commit

Permalink
knowpro improvements (#744)
Browse files Browse the repository at this point in the history
Ongoing iteration:

Scoring: 
- allow custom score boosting
- Auto boost entity name/type matches when "search Terms" don't provide
a property name
Operators:
- Grouping
- Threads
Merging:
- Composite entities / 'distinct', part1. 

Reactoring
  • Loading branch information
umeshma authored Feb 24, 2025
1 parent 151dd00 commit c2e98ac
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 49 deletions.
60 changes: 36 additions & 24 deletions ts/packages/knowPro/src/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -538,34 +538,46 @@ export class PropertyTermSet {
}

/**
* Return a new set that is the union of two sets
* @param x
* @param y
* @returns
* Unions two un-sorted arrays
* @param xArray
* @param yArray
*/
export function unionSet<T = any>(x: Set<T>, y: Set<T>): Set<T> {
let from: Set<T>;
let to: Set<T>;
if (x.size > y.size) {
from = y;
to = x;
} else {
from = x;
to = y;
}
const union = new Set(to);
if (from.size > 0) {
for (const value of from.values()) {
union.add(value);
export function unionArrays<T = any>(
x: T[] | undefined,
y: T[] | undefined,
): T[] | undefined {
if (x) {
if (y) {
return [...union(x.values(), y.values())];
}
return x;
}
return union;
return y;
}

export function unionInPlace<T = any>(set: Set<T>, other: Set<T>): void {
if (other.size > 0) {
for (const value of other.values()) {
set.add(value);
}
/**
* Unions two un-sorted iterators/arrays using a set
* @param xArray
* @param yArray
*/
function* union<T>(
xArray: Iterator<T> | Array<T>,
yArray: Iterator<T> | Array<T>,
): IterableIterator<T> {
const x: Iterator<T> = Array.isArray(xArray) ? xArray.values() : xArray;
const y: Iterator<T> = Array.isArray(yArray) ? yArray.values() : yArray;
let unionSet = new Set<T>();
let xVal = x.next();
while (!xVal.done) {
unionSet.add(xVal.value);
xVal = x.next();
}
let yVal = y.next();
while (!yVal.done) {
unionSet.add(yVal.value);
yVal = y.next();
}
for (const value of unionSet.values()) {
yield value;
}
}
145 changes: 140 additions & 5 deletions ts/packages/knowPro/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ import {
TextLocation,
TextRange,
} from "./dataFormat.js";
import { KnowledgePropertyName, PropertySearchTerm } from "./search.js";
import {
CompositeEntity,
KnowledgePropertyName,
PropertySearchTerm,
Scored,
SearchResult,
} from "./search.js";
import { SearchTerm } from "./search.js";
import {
Match,
Expand All @@ -26,14 +32,15 @@ import {
TermSet,
TextRangeCollection,
TextRangesInScope,
unionArrays,
} from "./collections.js";
import {
lookupPropertyInPropertyIndex,
PropertyNames,
} from "./propertyIndex.js";
import { IPropertyToSemanticRefIndex } from "./secondaryIndexes.js";
import { conversation } from "knowledge-processor";
import { collections } from "typeagent";
import { collections, getTopK } from "typeagent";
import { ITimestampToTextRangeIndex } from "./secondaryIndexes.js";
import { Thread } from "./conversationThread.js";

Expand Down Expand Up @@ -213,7 +220,7 @@ function matchSearchTermToOneOfText(
return false;
}

function matchPropertySearchTermToEntity(
export function matchPropertySearchTermToEntity(
searchTerm: PropertySearchTerm,
semanticRef: SemanticRef,
): boolean {
Expand Down Expand Up @@ -250,6 +257,16 @@ function matchPropertySearchTermToEntity(
return false;
}

export function matchEntityNameOrType(
propertyValue: SearchTerm,
entity: conversation.ConcreteEntity,
): boolean {
return (
matchSearchTermToText(propertyValue, entity.name) ||
matchSearchTermToOneOfText(propertyValue, entity.type)
);
}

function matchPropertyNameToFacetName(
propertyValue: SearchTerm,
entity: conversation.ConcreteEntity,
Expand Down Expand Up @@ -600,7 +617,14 @@ export class MatchTermExpr extends QueryOpExpr<
}

export class MatchSearchTermExpr extends MatchTermExpr {
constructor(public searchTerm: SearchTerm) {
constructor(
public searchTerm: SearchTerm,
public scoreBooster?: (
searchTerm: SearchTerm,
sr: SemanticRef,
scored: ScoredSemanticRef,
) => ScoredSemanticRef,
) {
super();
}

Expand Down Expand Up @@ -630,12 +654,22 @@ export class MatchSearchTermExpr extends MatchTermExpr {
context: QueryEvalContext,
term: Term,
): ScoredSemanticRef[] | IterableIterator<ScoredSemanticRef> | undefined {
return lookupTerm(
const matches = lookupTerm(
context.semanticRefIndex,
term,
context.semanticRefs,
context.textRangesInScope,
);
if (matches && this.scoreBooster) {
for (let i = 0; i < matches.length; ++i) {
matches[i] = this.scoreBooster(
this.searchTerm,
context.getSemanticRef(matches[i].semanticRefIndex),
matches[i],
);
}
}
return matches;
}

private accumulateMatchesForTerm(
Expand Down Expand Up @@ -874,6 +908,22 @@ export class SelectTopNKnowledgeGroupExpr extends QueryOpExpr<
}
}

export class GroupSearchResultsExpr extends QueryOpExpr<
Map<KnowledgeType, SearchResult>
> {
constructor(
public srcExpr: IQueryOpExpr<
Map<KnowledgeType, SemanticRefAccumulator>
>,
) {
super();
}

public eval(context: QueryEvalContext): Map<KnowledgeType, SearchResult> {
return toGroupedSearchResults(this.srcExpr.eval(context));
}
}

export class WhereSemanticRefExpr extends QueryOpExpr<SemanticRefAccumulator> {
constructor(
public sourceExpr: IQueryOpExpr<SemanticRefAccumulator>,
Expand Down Expand Up @@ -1092,3 +1142,88 @@ export class ThreadSelector implements IQueryTextRangeSelector {
return new TextRangeCollection(this.thread.ranges);
}
}

export function toGroupedSearchResults(
evalResults: Map<KnowledgeType, SemanticRefAccumulator>,
): Map<KnowledgeType, SearchResult> {
const semanticRefMatches = new Map<KnowledgeType, SearchResult>();
for (const [type, accumulator] of evalResults) {
if (accumulator.size > 0) {
semanticRefMatches.set(type, {
termMatches: accumulator.searchTermMatches,
semanticRefMatches: accumulator.toScoredSemanticRefs(),
});
}
}
return semanticRefMatches;
}

export function mergeEntityMatches(
semanticRefs: SemanticRef[],
semanticRefMatches: ScoredSemanticRef[],
topK?: number,
): Scored<CompositeEntity>[] {
let mergedEntities = new Map<string, Scored<CompositeEntity>>();
for (let semanticRefMatch of semanticRefMatches) {
const semanticRef = semanticRefs[semanticRefMatch.semanticRefIndex];
if (semanticRef.knowledgeType !== "entity") {
continue;
}
const compositeEntity = toCompositeEntity(
semanticRef.knowledge as conversation.ConcreteEntity,
);
const existing = mergedEntities.get(compositeEntity.name);
if (existing) {
if (combineCompositeEntities(existing.item, compositeEntity)) {
existing.score += semanticRefMatch.score;
}
} else {
mergedEntities.set(compositeEntity.name, {
item: compositeEntity,
score: semanticRefMatch.score,
});
}
}
if (topK !== undefined && topK > 0) {
return getTopK(mergedEntities.values(), topK);
}
return [...mergedEntities.values()];
}

function toCompositeEntity(
entity: conversation.ConcreteEntity,
): CompositeEntity {
if (entity === undefined) {
return {
name: "undefined",
type: ["undefined"],
};
}
const composite: CompositeEntity = {
name: entity.name,
type: [...entity.type],
};
composite.name = composite.name.toLowerCase();
collections.lowerAndSort(composite.type);
if (entity.facets) {
composite.facets = entity.facets.map((f) => facetToString(f));
collections.lowerAndSort(composite.facets);
}
return composite;
}

function facetToString(facet: conversation.Facet): string {
return `${facet.name}="${conversation.knowledgeValueToString(facet.value)}"`;
}

function combineCompositeEntities(
x: CompositeEntity,
y: CompositeEntity,
): boolean {
if (x.name !== y.name) {
return false;
}
x.type = unionArrays(x.type, y.type)!;
x.facets = unionArrays(x.facets, y.facets);
return true;
}
Loading

0 comments on commit c2e98ac

Please sign in to comment.