-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: detach dices coefficient to be the must-use alg
- Loading branch information
1 parent
3fa4f50
commit 947425f
Showing
3 changed files
with
64 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,61 +1,35 @@ | ||
export function compareTwoStrings(first: string, second: string) { | ||
first = first.replace(/\s+/g, ''); | ||
second = second.replace(/\s+/g, ''); | ||
|
||
if (first === second) return 1; // identical or empty | ||
if (first.length < 2 || second.length < 2) return 0; // if either is a 0-letter or 1-letter string | ||
|
||
const firstBigrams = new Map(); | ||
for (let i = 0; i < first.length - 1; i++) { | ||
const bigram = first.substring(i, i + 2); | ||
const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) + 1 : 1; | ||
|
||
firstBigrams.set(bigram, count); | ||
} | ||
|
||
let intersectionSize = 0; | ||
for (let i = 0; i < second.length - 1; i++) { | ||
const bigram = second.substring(i, i + 2); | ||
const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) : 0; | ||
|
||
if (count > 0) { | ||
firstBigrams.set(bigram, count - 1); | ||
intersectionSize++; | ||
} | ||
} | ||
|
||
return (2.0 * intersectionSize) / (first.length + second.length - 2); | ||
import diceCoefficient from './stringSimilarity/dice'; | ||
|
||
export type StringSimilarityCompareFunc = (first: string, second: string) => number; | ||
export type BestMatchResult = { | ||
ratings: [string, number][]; | ||
bestMatch: [string, number]; | ||
}; | ||
export interface StringSimilarity { | ||
compare: StringSimilarityCompareFunc; | ||
bestMatch: (value: string, targets: string[]) => BestMatchResult; | ||
} | ||
|
||
export function findBestMatch(mainString: string, targetStrings: string[]) { | ||
if (!areArgsValid(mainString, targetStrings)) throw new Error('Bad arguments: First argument should be a string, second should be an array of strings'); | ||
|
||
const ratings = []; | ||
let bestMatchIndex = 0; | ||
|
||
for (let i = 0; i < targetStrings.length; i++) { | ||
const currentTargetString = targetStrings[i]; | ||
const currentRating = compareTwoStrings(mainString, currentTargetString); | ||
ratings.push({ target: currentTargetString, rating: currentRating }); | ||
if (currentRating > ratings[bestMatchIndex].rating) { | ||
bestMatchIndex = i; | ||
} | ||
} | ||
|
||
const bestMatch = ratings[bestMatchIndex]; | ||
|
||
return { ratings: ratings, bestMatch: bestMatch, bestMatchIndex: bestMatchIndex }; | ||
export enum StringSimilarityAlgs { | ||
DiceCoefficient = 'dice_coefficient', | ||
} | ||
|
||
function areArgsValid(mainString: string, targetStrings: string[]) { | ||
if (typeof mainString !== 'string') return false; | ||
if (!Array.isArray(targetStrings)) return false; | ||
if (!targetStrings.length) return false; | ||
if ( | ||
targetStrings.find(function (s) { | ||
return typeof s !== 'string'; | ||
}) | ||
) | ||
return false; | ||
return true; | ||
} | ||
export const stringSimilarityAlgs: Record<StringSimilarityAlgs, StringSimilarity> = { | ||
dice_coefficient: { | ||
compare: diceCoefficient, | ||
bestMatch(value, targets) { | ||
const ratings: [string, number][] = []; | ||
let bestMatchIndex = 0; | ||
for (let i = 0; i < targets.length; i++) { | ||
const currentTargetString = targets[i]; | ||
const currentRating = diceCoefficient(value, currentTargetString); | ||
ratings.push([currentTargetString, currentRating]); | ||
if (currentRating > ratings[bestMatchIndex][1]) { | ||
bestMatchIndex = i; | ||
} | ||
} | ||
const bestMatch = ratings[bestMatchIndex]; | ||
return { ratings: ratings, bestMatch: bestMatch }; | ||
}, | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import { StringSimilarityCompareFunc } from '../stringCorrection'; | ||
|
||
const dicesCoefficient: StringSimilarityCompareFunc = (first, second) => { | ||
first = first.replace(/\s+/g, ''); | ||
second = second.replace(/\s+/g, ''); | ||
if (first === second) return 1; // identical or empty | ||
if (first.length < 2 || second.length < 2) return 0; // if either is a 0-letter or 1-letter string | ||
const firstBigrams = new Map(); | ||
for (let i = 0; i < first.length - 1; i++) { | ||
const bigram = first.substring(i, i + 2); | ||
const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) + 1 : 1; | ||
firstBigrams.set(bigram, count); | ||
} | ||
let intersectionSize = 0; | ||
for (let i = 0; i < second.length - 1; i++) { | ||
const bigram = second.substring(i, i + 2); | ||
const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) : 0; | ||
if (count > 0) { | ||
firstBigrams.set(bigram, count - 1); | ||
intersectionSize++; | ||
} | ||
} | ||
return (2.0 * intersectionSize) / (first.length + second.length - 2); | ||
}; | ||
|
||
export default dicesCoefficient; |