diff --git a/.yarn/cache/@types-node-npm-20.10.5-9464a4540b-e216b679f5.zip b/.yarn/cache/@types-node-npm-20.10.5-9464a4540b-e216b679f5.zip new file mode 100644 index 0000000000..f3b3f2e8dd Binary files /dev/null and b/.yarn/cache/@types-node-npm-20.10.5-9464a4540b-e216b679f5.zip differ diff --git a/.yarn/cache/flatbuffers-npm-1.12.0-51ee5c20bf-8a6461ec80.zip b/.yarn/cache/flatbuffers-npm-1.12.0-51ee5c20bf-8a6461ec80.zip new file mode 100644 index 0000000000..e86a64a9f1 Binary files /dev/null and b/.yarn/cache/flatbuffers-npm-1.12.0-51ee5c20bf-8a6461ec80.zip differ diff --git a/.yarn/cache/guid-typescript-npm-1.0.9-01fb89a1bf-829dd87866.zip b/.yarn/cache/guid-typescript-npm-1.0.9-01fb89a1bf-829dd87866.zip new file mode 100644 index 0000000000..d7674ec605 Binary files /dev/null and b/.yarn/cache/guid-typescript-npm-1.0.9-01fb89a1bf-829dd87866.zip differ diff --git a/.yarn/cache/long-npm-5.2.3-61dddb7586-885ede7c3d.zip b/.yarn/cache/long-npm-5.2.3-61dddb7586-885ede7c3d.zip new file mode 100644 index 0000000000..b6e2d5d0d4 Binary files /dev/null and b/.yarn/cache/long-npm-5.2.3-61dddb7586-885ede7c3d.zip differ diff --git a/.yarn/cache/onnxruntime-common-npm-1.16.3-a42fa88d62-b8b83fd318.zip b/.yarn/cache/onnxruntime-common-npm-1.16.3-a42fa88d62-b8b83fd318.zip new file mode 100644 index 0000000000..7272cc0c5b Binary files /dev/null and b/.yarn/cache/onnxruntime-common-npm-1.16.3-a42fa88d62-b8b83fd318.zip differ diff --git a/.yarn/cache/onnxruntime-web-npm-1.16.3-3de219cbcb-e8b71733f9.zip b/.yarn/cache/onnxruntime-web-npm-1.16.3-3de219cbcb-e8b71733f9.zip new file mode 100644 index 0000000000..3499ad59d7 Binary files /dev/null and b/.yarn/cache/onnxruntime-web-npm-1.16.3-3de219cbcb-e8b71733f9.zip differ diff --git a/.yarn/cache/platform-npm-1.3.6-8c3cef9352-6f472a09c6.zip b/.yarn/cache/platform-npm-1.3.6-8c3cef9352-6f472a09c6.zip new file mode 100644 index 0000000000..978ae74e67 Binary files /dev/null and b/.yarn/cache/platform-npm-1.3.6-8c3cef9352-6f472a09c6.zip differ diff --git a/.yarn/cache/protobufjs-npm-7.2.5-3439c554a5-3770a07211.zip b/.yarn/cache/protobufjs-npm-7.2.5-3439c554a5-3770a07211.zip new file mode 100644 index 0000000000..c9e1ac65f0 Binary files /dev/null and b/.yarn/cache/protobufjs-npm-7.2.5-3439c554a5-3770a07211.zip differ diff --git a/.yarn/cache/undici-types-npm-5.26.5-de4f7c7bb9-3192ef6f3f.zip b/.yarn/cache/undici-types-npm-5.26.5-de4f7c7bb9-3192ef6f3f.zip new file mode 100644 index 0000000000..6ab5ae67e3 Binary files /dev/null and b/.yarn/cache/undici-types-npm-5.26.5-de4f7c7bb9-3192ef6f3f.zip differ diff --git a/apps/frontend/src/app/PageArtifact/index.tsx b/apps/frontend/src/app/PageArtifact/index.tsx index a945780a6a..f689a2c1c6 100644 --- a/apps/frontend/src/app/PageArtifact/index.tsx +++ b/apps/frontend/src/app/PageArtifact/index.tsx @@ -61,7 +61,7 @@ export default function PageArtifact() { const { database } = useContext(DatabaseContext) const artifactDisplayState = useDisplayArtifact() - const [showEditor, onShowEditor, onHideEditor] = useBoolState(false) + const [showEditor, onShowEditor, onHideEditor] = useBoolState(true) const [showDup, onShowDup, onHideDup] = useBoolState(false) diff --git a/apps/frontend/src/assets/simplenet.onnx b/apps/frontend/src/assets/simplenet.onnx new file mode 100644 index 0000000000..615fff2989 Binary files /dev/null and b/apps/frontend/src/assets/simplenet.onnx differ diff --git a/libs/gi-art-scanner/src/lib/processImg.ts b/libs/gi-art-scanner/src/lib/processImg.ts index f4ec2f9c03..37c945e280 100644 --- a/libs/gi-art-scanner/src/lib/processImg.ts +++ b/libs/gi-art-scanner/src/lib/processImg.ts @@ -43,6 +43,8 @@ import { parseSubstats, } from './parse' +import { processEntryML } from './processImgML' + export type Processed = { fileName: string imageURL: string @@ -68,6 +70,21 @@ export async function processEntry( const imageData = await urlToImageData(imageURL) const debugImgs = debug ? ({} as Record) : undefined + if (true) { + const { artifact, texts, imageURL } = await processEntryML( + imageData, + textsFromImage, + debugImgs + ) + return { + fileName: fName, + imageURL, + artifact, + texts, + debugImgs, + } + } + const artifactCardImageData = verticallyCropArtifactCard(imageData, debugImgs) const artifactCardCanvas = imageDataToCanvas(artifactCardImageData) @@ -335,7 +352,7 @@ function verticallyCropArtifactCard( return cropped } -function parseRarity( +export function parseRarity( headerData: ImageData, debugImgs?: Record ) { diff --git a/libs/gi-art-scanner/src/lib/processImgML.ts b/libs/gi-art-scanner/src/lib/processImgML.ts new file mode 100644 index 0000000000..de3d99cf6b --- /dev/null +++ b/libs/gi-art-scanner/src/lib/processImgML.ts @@ -0,0 +1,241 @@ +import * as ort from 'onnxruntime-web' +import { lockColor } from './consts' +import { + crop, + imageDataToCanvas, + resize, + drawBox, + invert, + histogramAnalysis, + darkerColor, + lighterColor, +} from '@genshin-optimizer/img-util' +import { PSM } from 'tesseract.js' +import { parseRarity } from './processImg' +import { + parseMainStatKeys, + parseMainStatValues, + parseSetKeys, + parseSlotKeys, + parseSubstats, +} from './parse' +import { findBestArtifact } from './findBestArtifact' + +type Box = { + x: number + y: number + w: number + h: number +} +type MLBoxes = { + title: Box + slot: Box + mainstat: Box + level: Box + rarity: Box + substats: Box + set: Box + lock: Box + bbox: Box +} + +function getBox( + result: ort.TypedTensor<'float32'>, + height: number, + width: number, + i: number, + offset?: { x1?: number; y1?: number } +): Box { + const x1 = result.data[4 * i] * width, + y1 = result.data[4 * i + 1] * height, + x2 = result.data[4 * i + 2] * width, + y2 = result.data[4 * i + 3] * height + + const w = x2 - x1, + h = y2 - y1 + return { x: x1 + (offset?.x1 ?? 0), y: y1 + (offset?.y1 ?? 0), w, h } +} +function padBox(box: Box, pad: number): Box { + return { + x: Math.max(box.x - (pad * box.w) / 2, 0), + y: Math.max(box.y - (pad * box.h) / 2, 0), + w: box.w * (1 + pad), + h: box.h * (1 + pad), + } +} +function box2CropOption(box: Box, pad?: number) { + if (pad) box = padBox(box, pad) + return { + x1: box.x, + y1: box.y, + x2: box.x + box.w, + y2: box.y + box.h, + } +} + +function prepareForOnnx(imageData: ImageData): Float32Array { + // Expects (200, 200, 3) image. Re-order + scale data to network's expected domain. + + const imageBuffer = new Float32Array(200 * 200 * 3) + imageBuffer.fill(0) + const normalization = [ + { mu: 0.485, std: 0.229 }, + { mu: 0.456, std: 0.224 }, + { mu: 0.406, std: 0.225 }, + ] + const _i = 1 + const _j = 4 * imageData.width // 4 * 200 + const _k = 4 + + for (let i = 0; i < 3; i++) { + const { mu, std } = normalization[i] + for (let j = 0; j < 200; j++) { + for (let k = 0; k < 200; k++) { + const v = imageData.data[_i * i + _j * j + _k * k] / 255.0 + imageBuffer[i * 200 * 200 + j * 200 + k] = (v - mu) / std + } + } + } + return imageBuffer +} + +async function doInference( + imageData: ImageData, + session: ort.InferenceSession, + cropOptions: { + x1?: number + x2?: number + y1?: number + y2?: number + }, + debugImgs?: Record +): Promise { + const imageCropped = crop(imageDataToCanvas(imageData), cropOptions) + const imageSized = resize(imageCropped, { width: 200, height: 200 }) + const imageBuffer = prepareForOnnx(imageSized) + + if (debugImgs) + debugImgs['MLInput'] = imageDataToCanvas(imageSized).toDataURL() + + const feeds = { + input1: new ort.Tensor('float32', imageBuffer, [1, 3, 200, 200]), + } + const results = await session.run(feeds) + const result = results['output1'] as ort.TypedTensor<'float32'> + const h = imageCropped.height, + w = imageCropped.width + const out = { + title: getBox(result, h, w, 0, cropOptions), + slot: getBox(result, h, w, 1, cropOptions), + mainstat: getBox(result, h, w, 2, cropOptions), + level: getBox(result, h, w, 3, cropOptions), + rarity: getBox(result, h, w, 4, cropOptions), + substats: getBox(result, h, w, 5, cropOptions), + set: getBox(result, h, w, 6, cropOptions), + lock: getBox(result, h, w, 7, cropOptions), + bbox: getBox(result, h, w, 8, cropOptions), + } + // Manually fix inconsistent substat box width + out.substats.w = out.lock.x - out.substats.x + return out +} + +export async function processEntryML( + imageDataRaw: ImageData, + textsFromImage: ( + imageData: ImageData, + options?: object | undefined + ) => Promise, + debugImgs?: Record +) { + // const session = await ort.InferenceSession.create('https://github.com/tooflesswulf/genshin-scanner/raw/main/onnx/simplenet.onnx') + const session = await ort.InferenceSession.create('./assets/simplenet.onnx', { + executionProviders: ['webgl'], + }) + + const mlBoxes0 = await doInference(imageDataRaw, session, {}, debugImgs) + const mlBoxes = await doInference( + imageDataRaw, + session, + box2CropOption(mlBoxes0.bbox, 0.2), + debugImgs + ) + + const rawCanvas = imageDataToCanvas(imageDataRaw) + const titleCrop = crop(rawCanvas, box2CropOption(mlBoxes.title, 0.1)) + const titleText = textsFromImage(titleCrop) + + const slotCrop = crop(rawCanvas, box2CropOption(mlBoxes.slot, 0.1)) + const slotText = textsFromImage(slotCrop) + + const levelCrop = invert(crop(rawCanvas, box2CropOption(mlBoxes.level, 0.1))) + const levelText = textsFromImage(levelCrop) + + const mainstatCrop = invert( + crop(rawCanvas, box2CropOption(mlBoxes.mainstat, 0.1)) + ) + const mainstatText = textsFromImage(mainstatCrop, { + tessedit_pageseg_mode: PSM.SPARSE_TEXT, + }) + + const substatCrop = crop(rawCanvas, box2CropOption(mlBoxes.substats, 0.1)) + const substatText = textsFromImage(substatCrop) + + const setCrop = crop(rawCanvas, box2CropOption(mlBoxes.set, 0.1)) + const setText = textsFromImage(setCrop) + + const lockCrop = crop(rawCanvas, box2CropOption(mlBoxes.lock, 0.1)) + const lockHisto = histogramAnalysis( + lockCrop, + darkerColor(lockColor), + lighterColor(lockColor) + ) + const locked = lockHisto.filter((v) => v > 5).length > 5 + + const rarityCrop = crop(rawCanvas, box2CropOption(mlBoxes.rarity, 0.1)) + const rarity = parseRarity(rarityCrop, debugImgs) + + const [artifact, texts] = findBestArtifact( + new Set([rarity]), + parseSetKeys(await setText), + parseSlotKeys(await slotText), + parseSubstats(await substatText), + parseMainStatKeys(await mainstatText), + parseMainStatValues(await mainstatText), + '', + locked + ) + + const canvasRaw = imageDataToCanvas(imageDataRaw) + drawBox(canvasRaw, mlBoxes.title, { r: 31, g: 119, b: 180, a: 80 }) + drawBox(canvasRaw, mlBoxes.slot, { r: 255, g: 127, b: 14, a: 80 }) + drawBox(canvasRaw, mlBoxes.mainstat, { r: 44, g: 160, b: 44, a: 80 }) + drawBox(canvasRaw, mlBoxes.level, { r: 214, g: 39, b: 40, a: 80 }) + drawBox(canvasRaw, mlBoxes.rarity, { r: 128, g: 103, b: 189, a: 80 }) + drawBox(canvasRaw, mlBoxes.substats, { r: 140, g: 86, b: 75, a: 80 }) + drawBox(canvasRaw, mlBoxes.set, { r: 227, g: 119, b: 194, a: 80 }) + drawBox(canvasRaw, mlBoxes.lock, { r: 188, g: 189, b: 34, a: 80 }) + drawBox(canvasRaw, mlBoxes.bbox, { r: 127, g: 127, b: 127, a: 60 }) + if (debugImgs) { + debugImgs['MLBoxesFull'] = canvasRaw.toDataURL() + debugImgs['slotCrop'] = imageDataToCanvas(slotCrop).toDataURL() + debugImgs['levelCrop'] = imageDataToCanvas(levelCrop).toDataURL() + debugImgs['mainstatCrop'] = imageDataToCanvas(mainstatCrop).toDataURL() + debugImgs['substatCrop'] = imageDataToCanvas(substatCrop).toDataURL() + debugImgs['setCrop'] = imageDataToCanvas(setCrop).toDataURL() + debugImgs['lockCrop'] = imageDataToCanvas(lockCrop).toDataURL() + debugImgs['rarityCrop'] = imageDataToCanvas(rarityCrop).toDataURL() + } + + const cropOp = box2CropOption(mlBoxes0.bbox, 0.2) + const canvas = imageDataToCanvas(crop(canvasRaw, cropOp)) + console.log('DETECTION: ', { artifact, texts }) + console.log('TEXT:', { + slotText, + levelText, + mainstatText, + substatText, + setText, + }) + return { artifact, texts, imageURL: canvas.toDataURL() } +} diff --git a/libs/img-util/src/canvas.ts b/libs/img-util/src/canvas.ts index f56585f9fc..7ff1eddf0d 100644 --- a/libs/img-util/src/canvas.ts +++ b/libs/img-util/src/canvas.ts @@ -17,6 +17,20 @@ export function drawline( return canvas } +export function drawBox( + canvas: HTMLCanvasElement, + { x, y, w, h }: { x: number; y: number; w: number; h: number }, + color: Color +) { + const ctx = canvas.getContext('2d')! + ctx.fillStyle = `rgba(${color.r},${color.g},${color.b},${ + color.a ? color.a / 255 : 1 + })` + ctx.fillRect(x, y, w, h) + + return canvas +} + export function drawHistogram( canvas: HTMLCanvasElement, histogram: number[], diff --git a/libs/img-util/src/imageData.ts b/libs/img-util/src/imageData.ts index 8a78a774ae..92a55a8712 100644 --- a/libs/img-util/src/imageData.ts +++ b/libs/img-util/src/imageData.ts @@ -47,6 +47,67 @@ export function crop(srcCanvas: HTMLCanvasElement, options: CropOptions) { return ctx.getImageData(x1, y1, x2 - x1, y2 - y1) } +function interpolate_bilinear( + image: ImageData, + x: number, + y: number, + i: number +) { + const x1 = x === image.width ? x - 1 : Math.floor(x), + x2 = x1 + 1 + const y1 = y === image.height ? y - 1 : Math.floor(y), + y2 = y1 + 1 + const ch = 4 + const _x = ch, + _y = image.width * ch + + const q11 = (x2 - x) * (y2 - y) * image.data[i + _x * x1 + _y * y1] + const q21 = (x - x1) * (y2 - y) * image.data[i + _x * x2 + _y * y1] + const q12 = (x2 - x) * (y - y1) * image.data[i + _x * x1 + _y * y2] + const q22 = (x - x1) * (y - y1) * image.data[i + _x * x2 + _y * y2] + return q11 + q21 + q12 + q22 +} +export function resize( + imageData: ImageData, + options: { width?: number; height?: number } +): ImageData { + const { width = imageData.width, height = imageData.height } = options + + const dataBuffer = new Uint8ClampedArray(width * height * 4) + const sx = (width - 1) / (imageData.width - 1) + const sy = (height - 1) / (imageData.height - 1) + for (let x = 0; x < width; x++) { + for (let y = 0; y < height; y++) { + for (let i = 0; i < 4; i++) { + dataBuffer[x * 4 + y * width * 4 + i] = interpolate_bilinear( + imageData, + x / sx, + y / sy, + i + ) + } + } + } + + const resized = new ImageData(dataBuffer, width, height) + return resized +} +export function invert(imageData: ImageData) { + const width = imageData.width, + height = imageData.height + + const invDataBuffer = new Uint8ClampedArray(width * height * 4) + for (let i = 0; i < width * height * 4; i++) { + if (i % 4 == 3) { + invDataBuffer[i] = imageData.data[i] + continue + } + invDataBuffer[i] = 255 - imageData.data[i] + } + + return new ImageData(invDataBuffer, width, height) +} + export const fileToURL = (file: File): Promise => new Promise((resolve) => { const reader = new FileReader() diff --git a/package.json b/package.json index 511547ac55..9cc97ac570 100644 --- a/package.json +++ b/package.json @@ -116,6 +116,7 @@ "jsonwebtoken": "^9.0.2", "next": "14.0.3", "next-auth": "^4.23.2", + "onnxruntime-web": "^1.16.3", "passport": "^0.6.0", "passport-jwt": "^4.0.1", "react": "18.2.0", diff --git a/yarn.lock b/yarn.lock index 60ac673517..70e10627b8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5965,6 +5965,15 @@ __metadata: languageName: node linkType: hard +"@types/node@npm:>=13.7.0": + version: 20.10.5 + resolution: "@types/node@npm:20.10.5" + dependencies: + undici-types: ~5.26.4 + checksum: e216b679f545a8356960ce985a0e53c3a58fff0eacd855e180b9e223b8db2b5bd07b744a002b8c1f0c37f9194648ab4578533b5c12df2ec10cc02f61d20948d2 + languageName: node + linkType: hard + "@types/parse-json@npm:^4.0.0": version: 4.0.0 resolution: "@types/parse-json@npm:4.0.0" @@ -11278,6 +11287,13 @@ __metadata: languageName: node linkType: hard +"flatbuffers@npm:^1.12.0": + version: 1.12.0 + resolution: "flatbuffers@npm:1.12.0" + checksum: 8a6461ec80a8f850c623439fbc3d031bac52dfd7dee27fbadf1d850e96fd92cbd782c28bf2a08f2d852a3ac329cc31e2ad21e133ab68993cf0df69d3dd32fd12 + languageName: node + linkType: hard + "flatted@npm:^3.1.0, flatted@npm:^3.2.7": version: 3.2.7 resolution: "flatted@npm:3.2.7" @@ -11633,6 +11649,7 @@ __metadata: next: 14.0.3 next-auth: ^4.23.2 nx: 16.10.0 + onnxruntime-web: ^1.16.3 passport: ^0.6.0 passport-jwt: ^4.0.1 prettier: ^2.8.4 @@ -12067,6 +12084,13 @@ __metadata: languageName: node linkType: hard +"guid-typescript@npm:^1.0.9": + version: 1.0.9 + resolution: "guid-typescript@npm:1.0.9" + checksum: 829dd87866800a5138aafa0873994028bbc446eb20ff4cae6452d471a2a3d26f7025bed3eb980692c0f022fd22f95ea7396122b46b45a4b5084958505a4fc50c + languageName: node + linkType: hard + "handle-thing@npm:^2.0.0": version: 2.0.1 resolution: "handle-thing@npm:2.0.1" @@ -14575,6 +14599,13 @@ __metadata: languageName: node linkType: hard +"long@npm:^5.0.0, long@npm:^5.2.3": + version: 5.2.3 + resolution: "long@npm:5.2.3" + checksum: 885ede7c3de4facccbd2cacc6168bae3a02c3e836159ea4252c87b6e34d40af819824b2d4edce330bfb5c4d6e8ce3ec5864bdcf9473fa1f53a4f8225860e5897 + languageName: node + linkType: hard + "longest-streak@npm:^3.0.0": version: 3.1.0 resolution: "longest-streak@npm:3.1.0" @@ -16259,6 +16290,27 @@ __metadata: languageName: node linkType: hard +"onnxruntime-common@npm:~1.16.3": + version: 1.16.3 + resolution: "onnxruntime-common@npm:1.16.3" + checksum: b8b83fd318576ef39d86f264ef0edbda2a837f2830a9a3d4f82b2d969328fe7b813f3a8ab24c4dc767f01355294790268bd0d327b9c8e2765d99b2ccedb53245 + languageName: node + linkType: hard + +"onnxruntime-web@npm:^1.16.3": + version: 1.16.3 + resolution: "onnxruntime-web@npm:1.16.3" + dependencies: + flatbuffers: ^1.12.0 + guid-typescript: ^1.0.9 + long: ^5.2.3 + onnxruntime-common: ~1.16.3 + platform: ^1.3.6 + protobufjs: ^7.2.4 + checksum: e8b71733f907928f4907c481a70e809499b930c072df67f0157c76bce14ef06c7c790843c31116e6aed11f1659a13d815375b1fe9e632358ee754c1e8b7d1a08 + languageName: node + linkType: hard + "open@npm:^8.0.9, open@npm:^8.4.0": version: 8.4.0 resolution: "open@npm:8.4.0" @@ -16792,6 +16844,13 @@ __metadata: languageName: node linkType: hard +"platform@npm:^1.3.6": + version: 1.3.6 + resolution: "platform@npm:1.3.6" + checksum: 6f472a09c61d418c7e26c1c16d0bdc029549d512dbec6526216a1e59ec68100d07007d0097dcba69dddad883d6f2a83361b4bdfe0094a3d9a2af24158643d85e + languageName: node + linkType: hard + "pluralize@npm:8.0.0": version: 8.0.0 resolution: "pluralize@npm:8.0.0" @@ -17401,6 +17460,26 @@ __metadata: languageName: node linkType: hard +"protobufjs@npm:^7.2.4": + version: 7.2.5 + resolution: "protobufjs@npm:7.2.5" + dependencies: + "@protobufjs/aspromise": ^1.1.2 + "@protobufjs/base64": ^1.1.2 + "@protobufjs/codegen": ^2.0.4 + "@protobufjs/eventemitter": ^1.1.0 + "@protobufjs/fetch": ^1.1.0 + "@protobufjs/float": ^1.0.2 + "@protobufjs/inquire": ^1.1.0 + "@protobufjs/path": ^1.1.2 + "@protobufjs/pool": ^1.1.0 + "@protobufjs/utf8": ^1.1.0 + "@types/node": ">=13.7.0" + long: ^5.0.0 + checksum: 3770a072114061faebbb17cfd135bc4e187b66bc6f40cd8bac624368b0270871ec0cfb43a02b9fb4f029c8335808a840f1afba3c2e7ede7063b98ae6b98a703f + languageName: node + linkType: hard + "proxy-addr@npm:~2.0.7": version: 2.0.7 resolution: "proxy-addr@npm:2.0.7" @@ -20223,6 +20302,13 @@ __metadata: languageName: node linkType: hard +"undici-types@npm:~5.26.4": + version: 5.26.5 + resolution: "undici-types@npm:5.26.5" + checksum: 3192ef6f3fd5df652f2dc1cd782b49d6ff14dc98e5dced492aa8a8c65425227da5da6aafe22523c67f035a272c599bb89cfe803c1db6311e44bed3042fc25487 + languageName: node + linkType: hard + "unicode-canonical-property-names-ecmascript@npm:^2.0.0": version: 2.0.0 resolution: "unicode-canonical-property-names-ecmascript@npm:2.0.0"