600 lines
19 KiB
TypeScript
600 lines
19 KiB
TypeScript
import { createWorker } from 'tesseract.js';
|
|
|
|
export interface OpusMagnumData {
|
|
puzzle: string;
|
|
cost: string;
|
|
cycles: string;
|
|
area: string;
|
|
confidence: {
|
|
puzzle: number;
|
|
cost: number;
|
|
cycles: number;
|
|
area: number;
|
|
overall: number;
|
|
};
|
|
}
|
|
|
|
export interface OCRRegion {
|
|
x: number;
|
|
y: number;
|
|
width: number;
|
|
height: number;
|
|
}
|
|
|
|
export class OpusMagnumOCRService {
|
|
private worker: Tesseract.Worker | null = null;
|
|
private availablePuzzleNames: string[] = [];
|
|
|
|
// Regions based on main.py coordinates (adjusted for web usage)
|
|
private readonly regions: Record<string, OCRRegion> = {
|
|
puzzle: { x: 15, y: 600, width: 330, height: 28 },
|
|
cost: { x: 412, y: 603, width: 65, height: 22 },
|
|
cycles: { x: 577, y: 603, width: 65, height: 22 },
|
|
area: { x: 739, y: 603, width: 65, height: 22 }
|
|
};
|
|
|
|
async initialize(): Promise<void> {
|
|
if (this.worker) return;
|
|
|
|
this.worker = await createWorker('eng');
|
|
await this.worker.setParameters({
|
|
tessedit_ocr_engine_mode: '3',
|
|
tessedit_pageseg_mode: 7 as any
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Set the list of available puzzle names for better OCR matching
|
|
*/
|
|
setAvailablePuzzleNames(puzzleNames: string[]): void {
|
|
this.availablePuzzleNames = puzzleNames;
|
|
console.log('OCR service updated with puzzle names:', puzzleNames);
|
|
}
|
|
|
|
/**
|
|
* Configure OCR specifically for puzzle name recognition
|
|
* Uses aggressive character whitelisting and dictionary constraints
|
|
*/
|
|
private async configurePuzzleOCR(): Promise<void> {
|
|
if (!this.worker) return;
|
|
|
|
// Configure Tesseract for maximum constraint to our puzzle names
|
|
await this.worker.setParameters({
|
|
// Disable all system dictionaries to prevent interference
|
|
load_system_dawg: '0',
|
|
load_freq_dawg: '0',
|
|
load_punc_dawg: '0',
|
|
load_number_dawg: '0',
|
|
load_unambig_dawg: '0',
|
|
load_bigram_dawg: '0',
|
|
load_fixed_length_dawgs: '0',
|
|
|
|
// Use only characters from our puzzle names
|
|
tessedit_char_whitelist: this.getPuzzleCharacterSet(),
|
|
|
|
// Optimize for single words/short phrases
|
|
tessedit_pageseg_mode: 8 as any, // Single word
|
|
|
|
// Increase penalties for non-dictionary words
|
|
segment_penalty_dict_nonword: '2.0',
|
|
segment_penalty_dict_frequent_word: '0.001',
|
|
segment_penalty_dict_case_ok: '0.001',
|
|
segment_penalty_dict_case_bad: '0.1',
|
|
|
|
// Make OCR more conservative about character recognition
|
|
classify_enable_learning: '0',
|
|
classify_enable_adaptive_matcher: '1',
|
|
|
|
// Preserve word boundaries
|
|
preserve_interword_spaces: '1'
|
|
});
|
|
|
|
console.log('OCR configured for puzzle names with character set:', this.getPuzzleCharacterSet());
|
|
}
|
|
|
|
/**
|
|
* Get character set from available puzzle names for more accurate OCR (fallback)
|
|
*/
|
|
private getPuzzleCharacterSet(): string {
|
|
if (this.availablePuzzleNames.length === 0) {
|
|
// Fallback to common characters
|
|
return 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 -'
|
|
}
|
|
|
|
// Extract unique characters from all puzzle names
|
|
const chars = new Set<string>()
|
|
this.availablePuzzleNames.forEach(name => {
|
|
for (const char of name) {
|
|
chars.add(char)
|
|
}
|
|
})
|
|
|
|
return Array.from(chars).join('')
|
|
}
|
|
|
|
async extractOpusMagnumData(imageFile: File): Promise<OpusMagnumData> {
|
|
if (!this.worker) {
|
|
await this.initialize();
|
|
}
|
|
|
|
// Convert file to image element for canvas processing
|
|
const imageUrl = URL.createObjectURL(imageFile);
|
|
const img = new Image();
|
|
|
|
return new Promise((resolve, reject) => {
|
|
img.onload = async () => {
|
|
try {
|
|
const canvas = document.createElement('canvas');
|
|
const ctx = canvas.getContext('2d')!;
|
|
|
|
canvas.width = img.width;
|
|
canvas.height = img.height;
|
|
ctx.drawImage(img, 0, 0);
|
|
|
|
// Extract text from each region
|
|
const results: Partial<OpusMagnumData> = {};
|
|
const confidenceScores: Record<string, number> = {};
|
|
|
|
for (const [key, region] of Object.entries(this.regions)) {
|
|
const regionCanvas = document.createElement('canvas');
|
|
const regionCtx = regionCanvas.getContext('2d')!;
|
|
|
|
regionCanvas.width = region.width;
|
|
regionCanvas.height = region.height;
|
|
|
|
// Extract region from main image
|
|
regionCtx.drawImage(
|
|
canvas,
|
|
region.x, region.y, region.width, region.height,
|
|
0, 0, region.width, region.height
|
|
);
|
|
|
|
// Convert to grayscale and invert (similar to main.py processing)
|
|
const imageData = regionCtx.getImageData(0, 0, region.width, region.height);
|
|
this.preprocessImage(imageData);
|
|
regionCtx.putImageData(imageData, 0, 0);
|
|
|
|
// Configure OCR based on content type
|
|
if (key === 'cost') {
|
|
// Cost field has digits + 'G' for gold (content type: 'digits_with_6')
|
|
await this.worker!.setParameters({
|
|
tessedit_char_whitelist: '0123456789G'
|
|
});
|
|
} else if (key === 'cycles' || key === 'area') {
|
|
// Pure digits (content type: 'digits')
|
|
await this.worker!.setParameters({
|
|
tessedit_char_whitelist: '0123456789'
|
|
});
|
|
} else if (key === 'puzzle') {
|
|
// Puzzle name - use user words file for better matching
|
|
await this.configurePuzzleOCR();
|
|
} else {
|
|
// Default - allow all characters
|
|
await this.worker!.setParameters({
|
|
tessedit_char_whitelist: ''
|
|
});
|
|
}
|
|
|
|
// Perform OCR on the region
|
|
const { data: { text, confidence } } = await this.worker!.recognize(regionCanvas);
|
|
let cleanText = text.trim();
|
|
|
|
// Store the confidence score for this field
|
|
confidenceScores[key] = confidence / 100; // Tesseract returns 0-100, we want 0-1
|
|
|
|
// Post-process based on field type
|
|
if (key === 'cost') {
|
|
// Handle common OCR misreadings where G is read as 6
|
|
// If the text ends with 6 and looks like it should be G, remove it
|
|
if (cleanText.endsWith('6') && cleanText.length > 1) {
|
|
// Check if removing the last character gives a reasonable cost value
|
|
const withoutLast = cleanText.slice(0, -1);
|
|
if (/^\d+$/.test(withoutLast)) {
|
|
cleanText = withoutLast;
|
|
}
|
|
}
|
|
// Remove any trailing G characters
|
|
cleanText = cleanText.replace(/G+$/g, '');
|
|
// Ensure only digits remain
|
|
cleanText = cleanText.replace(/[^0-9]/g, '');
|
|
} else if (key === 'cycles' || key === 'area') {
|
|
// Ensure only digits remain
|
|
cleanText = cleanText.replace(/[^0-9]/g, '');
|
|
} else if (key === 'puzzle') {
|
|
// Post-process puzzle names with aggressive matching to force selection from available puzzles
|
|
cleanText = this.findBestPuzzleMatch(cleanText);
|
|
|
|
// If we still don't have a match and we have available puzzles, force the best match
|
|
if (this.availablePuzzleNames.length > 0 && !this.availablePuzzleNames.includes(cleanText)) {
|
|
const forcedMatch = this.findBestPuzzleMatchForced(cleanText);
|
|
if (forcedMatch) {
|
|
cleanText = forcedMatch;
|
|
console.log(`Forced OCR match: "${text.trim()}" -> "${cleanText}"`);
|
|
}
|
|
}
|
|
}
|
|
|
|
(results as any)[key] = cleanText;
|
|
}
|
|
|
|
URL.revokeObjectURL(imageUrl);
|
|
|
|
// Calculate overall confidence as the average of all field confidences
|
|
const confidenceValues = Object.values(confidenceScores);
|
|
const overallConfidence = confidenceValues.length > 0
|
|
? confidenceValues.reduce((sum, conf) => sum + conf, 0) / confidenceValues.length
|
|
: 0;
|
|
|
|
resolve({
|
|
puzzle: results.puzzle || '',
|
|
cost: results.cost || '',
|
|
cycles: results.cycles || '',
|
|
area: results.area || '',
|
|
confidence: {
|
|
puzzle: confidenceScores.puzzle || 0,
|
|
cost: confidenceScores.cost || 0,
|
|
cycles: confidenceScores.cycles || 0,
|
|
area: confidenceScores.area || 0,
|
|
overall: overallConfidence
|
|
}
|
|
});
|
|
} catch (error) {
|
|
URL.revokeObjectURL(imageUrl);
|
|
reject(error);
|
|
}
|
|
};
|
|
|
|
img.onerror = () => {
|
|
URL.revokeObjectURL(imageUrl);
|
|
reject(new Error('Failed to load image'));
|
|
};
|
|
|
|
img.src = imageUrl;
|
|
});
|
|
}
|
|
|
|
private preprocessImage(imageData: ImageData): void {
|
|
// Convert to grayscale and invert (similar to cv2.bitwise_not in main.py)
|
|
const data = imageData.data;
|
|
|
|
for (let i = 0; i < data.length; i += 4) {
|
|
// Convert to grayscale
|
|
const gray = Math.round(0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2]);
|
|
|
|
// Invert the grayscale value
|
|
const inverted = 255 - gray;
|
|
|
|
data[i] = inverted; // Red
|
|
data[i + 1] = inverted; // Green
|
|
data[i + 2] = inverted; // Blue
|
|
// Alpha channel (data[i + 3]) remains unchanged
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein distance between two strings
|
|
*/
|
|
private levenshteinDistance(str1: string, str2: string): number {
|
|
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
|
|
|
for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
|
|
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
|
|
|
|
for (let j = 1; j <= str2.length; j++) {
|
|
for (let i = 1; i <= str1.length; i++) {
|
|
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
matrix[j][i] = Math.min(
|
|
matrix[j][i - 1] + 1, // deletion
|
|
matrix[j - 1][i] + 1, // insertion
|
|
matrix[j - 1][i - 1] + indicator // substitution
|
|
);
|
|
}
|
|
}
|
|
|
|
return matrix[str2.length][str1.length];
|
|
}
|
|
|
|
/**
|
|
* Find the best matching puzzle name from available options using multiple strategies
|
|
*/
|
|
private findBestPuzzleMatch(ocrText: string): string {
|
|
if (!this.availablePuzzleNames.length) {
|
|
return ocrText.trim();
|
|
}
|
|
|
|
const cleanedOcr = ocrText.trim();
|
|
if (!cleanedOcr) return '';
|
|
|
|
// Strategy 1: Exact match (case insensitive)
|
|
const exactMatch = this.availablePuzzleNames.find(
|
|
name => name.toLowerCase() === cleanedOcr.toLowerCase()
|
|
);
|
|
if (exactMatch) return exactMatch;
|
|
|
|
// Strategy 2: Substring match (either direction)
|
|
const substringMatch = this.availablePuzzleNames.find(
|
|
name => name.toLowerCase().includes(cleanedOcr.toLowerCase()) ||
|
|
cleanedOcr.toLowerCase().includes(name.toLowerCase())
|
|
);
|
|
if (substringMatch) return substringMatch;
|
|
|
|
// Strategy 3: Multiple fuzzy matching approaches
|
|
let bestMatch = cleanedOcr;
|
|
let bestScore = 0;
|
|
|
|
for (const puzzleName of this.availablePuzzleNames) {
|
|
const scores = [
|
|
this.calculateLevenshteinSimilarity(cleanedOcr, puzzleName),
|
|
this.calculateJaroWinklerSimilarity(cleanedOcr, puzzleName),
|
|
this.calculateNGramSimilarity(cleanedOcr, puzzleName, 2)
|
|
];
|
|
|
|
// Use the maximum score from all algorithms
|
|
const maxScore = Math.max(...scores);
|
|
|
|
// Lower threshold for better matching - force selection even with moderate confidence
|
|
if (maxScore > bestScore && maxScore > 0.4) {
|
|
bestScore = maxScore;
|
|
bestMatch = puzzleName;
|
|
}
|
|
}
|
|
|
|
// Strategy 4: If no good match found, try character-based matching
|
|
if (bestScore < 0.6) {
|
|
const charMatch = this.findBestCharacterMatch(cleanedOcr);
|
|
if (charMatch) {
|
|
bestMatch = charMatch;
|
|
}
|
|
}
|
|
|
|
return bestMatch;
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein similarity (normalized)
|
|
*/
|
|
private calculateLevenshteinSimilarity(str1: string, str2: string): number {
|
|
const distance = this.levenshteinDistance(str1.toLowerCase(), str2.toLowerCase());
|
|
const maxLength = Math.max(str1.length, str2.length);
|
|
return maxLength === 0 ? 1 : 1 - (distance / maxLength);
|
|
}
|
|
|
|
/**
|
|
* Calculate Jaro-Winkler similarity
|
|
*/
|
|
private calculateJaroWinklerSimilarity(str1: string, str2: string): number {
|
|
const s1 = str1.toLowerCase();
|
|
const s2 = str2.toLowerCase();
|
|
|
|
if (s1 === s2) return 1;
|
|
|
|
const matchWindow = Math.floor(Math.max(s1.length, s2.length) / 2) - 1;
|
|
if (matchWindow < 0) return 0;
|
|
|
|
const s1Matches = new Array(s1.length).fill(false);
|
|
const s2Matches = new Array(s2.length).fill(false);
|
|
|
|
let matches = 0;
|
|
let transpositions = 0;
|
|
|
|
// Find matches
|
|
for (let i = 0; i < s1.length; i++) {
|
|
const start = Math.max(0, i - matchWindow);
|
|
const end = Math.min(i + matchWindow + 1, s2.length);
|
|
|
|
for (let j = start; j < end; j++) {
|
|
if (s2Matches[j] || s1[i] !== s2[j]) continue;
|
|
s1Matches[i] = true;
|
|
s2Matches[j] = true;
|
|
matches++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (matches === 0) return 0;
|
|
|
|
// Count transpositions
|
|
let k = 0;
|
|
for (let i = 0; i < s1.length; i++) {
|
|
if (!s1Matches[i]) continue;
|
|
while (!s2Matches[k]) k++;
|
|
if (s1[i] !== s2[k]) transpositions++;
|
|
k++;
|
|
}
|
|
|
|
const jaro = (matches / s1.length + matches / s2.length + (matches - transpositions / 2) / matches) / 3;
|
|
|
|
// Jaro-Winkler bonus for common prefix
|
|
let prefix = 0;
|
|
for (let i = 0; i < Math.min(s1.length, s2.length, 4); i++) {
|
|
if (s1[i] === s2[i]) prefix++;
|
|
else break;
|
|
}
|
|
|
|
return jaro + (0.1 * prefix * (1 - jaro));
|
|
}
|
|
|
|
/**
|
|
* Calculate N-gram similarity
|
|
*/
|
|
private calculateNGramSimilarity(str1: string, str2: string, n: number): number {
|
|
const s1 = str1.toLowerCase();
|
|
const s2 = str2.toLowerCase();
|
|
|
|
if (s1 === s2) return 1;
|
|
if (s1.length < n || s2.length < n) return 0;
|
|
|
|
const ngrams1 = new Set<string>();
|
|
const ngrams2 = new Set<string>();
|
|
|
|
for (let i = 0; i <= s1.length - n; i++) {
|
|
ngrams1.add(s1.substr(i, n));
|
|
}
|
|
|
|
for (let i = 0; i <= s2.length - n; i++) {
|
|
ngrams2.add(s2.substr(i, n));
|
|
}
|
|
|
|
const intersection = new Set([...ngrams1].filter(x => ngrams2.has(x)));
|
|
const union = new Set([...ngrams1, ...ngrams2]);
|
|
|
|
return intersection.size / union.size;
|
|
}
|
|
|
|
/**
|
|
* Find best match based on character frequency
|
|
*/
|
|
private findBestCharacterMatch(ocrText: string): string | null {
|
|
let bestMatch = null;
|
|
let bestScore = 0;
|
|
|
|
for (const puzzleName of this.availablePuzzleNames) {
|
|
const score = this.calculateCharacterFrequencyScore(ocrText.toLowerCase(), puzzleName.toLowerCase());
|
|
if (score > bestScore && score > 0.3) {
|
|
bestScore = score;
|
|
bestMatch = puzzleName;
|
|
}
|
|
}
|
|
|
|
return bestMatch;
|
|
}
|
|
|
|
/**
|
|
* Calculate character frequency similarity
|
|
*/
|
|
private calculateCharacterFrequencyScore(str1: string, str2: string): number {
|
|
const freq1 = new Map<string, number>();
|
|
const freq2 = new Map<string, number>();
|
|
|
|
for (const char of str1) {
|
|
freq1.set(char, (freq1.get(char) || 0) + 1);
|
|
}
|
|
|
|
for (const char of str2) {
|
|
freq2.set(char, (freq2.get(char) || 0) + 1);
|
|
}
|
|
|
|
const allChars = new Set([...freq1.keys(), ...freq2.keys()]);
|
|
let similarity = 0;
|
|
let totalChars = 0;
|
|
|
|
for (const char of allChars) {
|
|
const count1 = freq1.get(char) || 0;
|
|
const count2 = freq2.get(char) || 0;
|
|
similarity += Math.min(count1, count2);
|
|
totalChars += Math.max(count1, count2);
|
|
}
|
|
|
|
return totalChars === 0 ? 0 : similarity / totalChars;
|
|
}
|
|
|
|
/**
|
|
* Force a match to available puzzle names - always returns a puzzle name
|
|
* This is used as a last resort to ensure OCR always selects from available puzzles
|
|
*/
|
|
private findBestPuzzleMatchForced(ocrText: string): string | null {
|
|
if (!this.availablePuzzleNames.length || !ocrText.trim()) {
|
|
return null;
|
|
}
|
|
|
|
const cleanedOcr = ocrText.trim().toLowerCase();
|
|
let bestMatch = this.availablePuzzleNames[0]; // Default to first puzzle
|
|
let bestScore = 0;
|
|
|
|
// Try all matching algorithms and pick the best overall score
|
|
for (const puzzleName of this.availablePuzzleNames) {
|
|
const scores = [
|
|
this.calculateLevenshteinSimilarity(cleanedOcr, puzzleName),
|
|
this.calculateJaroWinklerSimilarity(cleanedOcr, puzzleName),
|
|
this.calculateNGramSimilarity(cleanedOcr, puzzleName, 2),
|
|
this.calculateCharacterFrequencyScore(cleanedOcr, puzzleName.toLowerCase()),
|
|
// Add length similarity bonus
|
|
this.calculateLengthSimilarity(cleanedOcr, puzzleName.toLowerCase())
|
|
];
|
|
|
|
// Use weighted average with emphasis on character frequency and length
|
|
const weightedScore = (
|
|
scores[0] * 0.25 + // Levenshtein
|
|
scores[1] * 0.25 + // Jaro-Winkler
|
|
scores[2] * 0.2 + // N-gram
|
|
scores[3] * 0.2 + // Character frequency
|
|
scores[4] * 0.1 // Length similarity
|
|
);
|
|
|
|
if (weightedScore > bestScore) {
|
|
bestScore = weightedScore;
|
|
bestMatch = puzzleName;
|
|
}
|
|
}
|
|
|
|
console.log(`Forced match for "${ocrText}": "${bestMatch}" (score: ${bestScore.toFixed(3)})`);
|
|
return bestMatch;
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity based on string length
|
|
*/
|
|
private calculateLengthSimilarity(str1: string, str2: string): number {
|
|
const len1 = str1.length;
|
|
const len2 = str2.length;
|
|
const maxLen = Math.max(len1, len2);
|
|
const minLen = Math.min(len1, len2);
|
|
|
|
return maxLen === 0 ? 1 : minLen / maxLen;
|
|
}
|
|
|
|
|
|
async terminate(): Promise<void> {
|
|
if (this.worker) {
|
|
await this.worker.terminate();
|
|
this.worker = null;
|
|
}
|
|
}
|
|
|
|
// Utility method to validate if an image looks like an Opus Magnum screenshot
|
|
static isValidOpusMagnumImage(file: File): boolean {
|
|
// Basic validation - could be enhanced with actual image analysis
|
|
const validTypes = ['image/jpeg', 'image/jpg', 'image/png', 'image/gif'];
|
|
return validTypes.includes(file.type);
|
|
}
|
|
|
|
// Debug method to visualize OCR regions (similar to main.py debug rectangles)
|
|
static drawDebugRegions(imageFile: File): Promise<string> {
|
|
return new Promise((resolve, reject) => {
|
|
const imageUrl = URL.createObjectURL(imageFile);
|
|
const img = new Image();
|
|
|
|
img.onload = () => {
|
|
const canvas = document.createElement('canvas');
|
|
const ctx = canvas.getContext('2d')!;
|
|
|
|
canvas.width = img.width;
|
|
canvas.height = img.height;
|
|
ctx.drawImage(img, 0, 0);
|
|
|
|
// Draw debug rectangles
|
|
ctx.strokeStyle = '#00ff00';
|
|
ctx.lineWidth = 2;
|
|
|
|
const service = new OpusMagnumOCRService();
|
|
Object.values(service.regions).forEach(region => {
|
|
ctx.strokeRect(region.x, region.y, region.width, region.height);
|
|
});
|
|
|
|
URL.revokeObjectURL(imageUrl);
|
|
resolve(canvas.toDataURL());
|
|
};
|
|
|
|
img.onerror = () => {
|
|
URL.revokeObjectURL(imageUrl);
|
|
reject(new Error('Failed to load image for debug'));
|
|
};
|
|
|
|
img.src = imageUrl;
|
|
});
|
|
}
|
|
}
|
|
|
|
// Singleton instance for the application
|
|
export const ocrService = new OpusMagnumOCRService();
|