opus-submitter/opus_submitter/src/services/ocrService.ts

600 lines
19 KiB
TypeScript

import { createWorker } from 'tesseract.js';
export interface OpusMagnumData {
puzzle: string;
cost: string;
cycles: string;
area: string;
confidence: {
puzzle: number;
cost: number;
cycles: number;
area: number;
overall: number;
};
}
export interface OCRRegion {
x: number;
y: number;
width: number;
height: number;
}
export class OpusMagnumOCRService {
private worker: Tesseract.Worker | null = null;
private availablePuzzleNames: string[] = [];
// Regions based on main.py coordinates (adjusted for web usage)
private readonly regions: Record<string, OCRRegion> = {
puzzle: { x: 15, y: 600, width: 330, height: 28 },
cost: { x: 412, y: 603, width: 65, height: 22 },
cycles: { x: 577, y: 603, width: 65, height: 22 },
area: { x: 739, y: 603, width: 65, height: 22 }
};
async initialize(): Promise<void> {
if (this.worker) return;
this.worker = await createWorker('eng');
await this.worker.setParameters({
tessedit_ocr_engine_mode: '3',
tessedit_pageseg_mode: 7 as any
});
}
/**
* Set the list of available puzzle names for better OCR matching
*/
setAvailablePuzzleNames(puzzleNames: string[]): void {
this.availablePuzzleNames = puzzleNames;
console.log('OCR service updated with puzzle names:', puzzleNames);
}
/**
* Configure OCR specifically for puzzle name recognition
* Uses aggressive character whitelisting and dictionary constraints
*/
private async configurePuzzleOCR(): Promise<void> {
if (!this.worker) return;
// Configure Tesseract for maximum constraint to our puzzle names
await this.worker.setParameters({
// Disable all system dictionaries to prevent interference
load_system_dawg: '0',
load_freq_dawg: '0',
load_punc_dawg: '0',
load_number_dawg: '0',
load_unambig_dawg: '0',
load_bigram_dawg: '0',
load_fixed_length_dawgs: '0',
// Use only characters from our puzzle names
tessedit_char_whitelist: this.getPuzzleCharacterSet(),
// Optimize for single words/short phrases
tessedit_pageseg_mode: 8 as any, // Single word
// Increase penalties for non-dictionary words
segment_penalty_dict_nonword: '2.0',
segment_penalty_dict_frequent_word: '0.001',
segment_penalty_dict_case_ok: '0.001',
segment_penalty_dict_case_bad: '0.1',
// Make OCR more conservative about character recognition
classify_enable_learning: '0',
classify_enable_adaptive_matcher: '1',
// Preserve word boundaries
preserve_interword_spaces: '1'
});
console.log('OCR configured for puzzle names with character set:', this.getPuzzleCharacterSet());
}
/**
* Get character set from available puzzle names for more accurate OCR (fallback)
*/
private getPuzzleCharacterSet(): string {
if (this.availablePuzzleNames.length === 0) {
// Fallback to common characters
return 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 -'
}
// Extract unique characters from all puzzle names
const chars = new Set<string>()
this.availablePuzzleNames.forEach(name => {
for (const char of name) {
chars.add(char)
}
})
return Array.from(chars).join('')
}
async extractOpusMagnumData(imageFile: File): Promise<OpusMagnumData> {
if (!this.worker) {
await this.initialize();
}
// Convert file to image element for canvas processing
const imageUrl = URL.createObjectURL(imageFile);
const img = new Image();
return new Promise((resolve, reject) => {
img.onload = async () => {
try {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d')!;
canvas.width = img.width;
canvas.height = img.height;
ctx.drawImage(img, 0, 0);
// Extract text from each region
const results: Partial<OpusMagnumData> = {};
const confidenceScores: Record<string, number> = {};
for (const [key, region] of Object.entries(this.regions)) {
const regionCanvas = document.createElement('canvas');
const regionCtx = regionCanvas.getContext('2d')!;
regionCanvas.width = region.width;
regionCanvas.height = region.height;
// Extract region from main image
regionCtx.drawImage(
canvas,
region.x, region.y, region.width, region.height,
0, 0, region.width, region.height
);
// Convert to grayscale and invert (similar to main.py processing)
const imageData = regionCtx.getImageData(0, 0, region.width, region.height);
this.preprocessImage(imageData);
regionCtx.putImageData(imageData, 0, 0);
// Configure OCR based on content type
if (key === 'cost') {
// Cost field has digits + 'G' for gold (content type: 'digits_with_6')
await this.worker!.setParameters({
tessedit_char_whitelist: '0123456789G'
});
} else if (key === 'cycles' || key === 'area') {
// Pure digits (content type: 'digits')
await this.worker!.setParameters({
tessedit_char_whitelist: '0123456789'
});
} else if (key === 'puzzle') {
// Puzzle name - use user words file for better matching
await this.configurePuzzleOCR();
} else {
// Default - allow all characters
await this.worker!.setParameters({
tessedit_char_whitelist: ''
});
}
// Perform OCR on the region
const { data: { text, confidence } } = await this.worker!.recognize(regionCanvas);
let cleanText = text.trim();
// Store the confidence score for this field
confidenceScores[key] = confidence / 100; // Tesseract returns 0-100, we want 0-1
// Post-process based on field type
if (key === 'cost') {
// Handle common OCR misreadings where G is read as 6
// If the text ends with 6 and looks like it should be G, remove it
if (cleanText.endsWith('6') && cleanText.length > 1) {
// Check if removing the last character gives a reasonable cost value
const withoutLast = cleanText.slice(0, -1);
if (/^\d+$/.test(withoutLast)) {
cleanText = withoutLast;
}
}
// Remove any trailing G characters
cleanText = cleanText.replace(/G+$/g, '');
// Ensure only digits remain
cleanText = cleanText.replace(/[^0-9]/g, '');
} else if (key === 'cycles' || key === 'area') {
// Ensure only digits remain
cleanText = cleanText.replace(/[^0-9]/g, '');
} else if (key === 'puzzle') {
// Post-process puzzle names with aggressive matching to force selection from available puzzles
cleanText = this.findBestPuzzleMatch(cleanText);
// If we still don't have a match and we have available puzzles, force the best match
if (this.availablePuzzleNames.length > 0 && !this.availablePuzzleNames.includes(cleanText)) {
const forcedMatch = this.findBestPuzzleMatchForced(cleanText);
if (forcedMatch) {
cleanText = forcedMatch;
console.log(`Forced OCR match: "${text.trim()}" -> "${cleanText}"`);
}
}
}
(results as any)[key] = cleanText;
}
URL.revokeObjectURL(imageUrl);
// Calculate overall confidence as the average of all field confidences
const confidenceValues = Object.values(confidenceScores);
const overallConfidence = confidenceValues.length > 0
? confidenceValues.reduce((sum, conf) => sum + conf, 0) / confidenceValues.length
: 0;
resolve({
puzzle: results.puzzle || '',
cost: results.cost || '',
cycles: results.cycles || '',
area: results.area || '',
confidence: {
puzzle: confidenceScores.puzzle || 0,
cost: confidenceScores.cost || 0,
cycles: confidenceScores.cycles || 0,
area: confidenceScores.area || 0,
overall: overallConfidence
}
});
} catch (error) {
URL.revokeObjectURL(imageUrl);
reject(error);
}
};
img.onerror = () => {
URL.revokeObjectURL(imageUrl);
reject(new Error('Failed to load image'));
};
img.src = imageUrl;
});
}
private preprocessImage(imageData: ImageData): void {
// Convert to grayscale and invert (similar to cv2.bitwise_not in main.py)
const data = imageData.data;
for (let i = 0; i < data.length; i += 4) {
// Convert to grayscale
const gray = Math.round(0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2]);
// Invert the grayscale value
const inverted = 255 - gray;
data[i] = inverted; // Red
data[i + 1] = inverted; // Green
data[i + 2] = inverted; // Blue
// Alpha channel (data[i + 3]) remains unchanged
}
}
/**
* Calculate Levenshtein distance between two strings
*/
private levenshteinDistance(str1: string, str2: string): number {
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(
matrix[j][i - 1] + 1, // deletion
matrix[j - 1][i] + 1, // insertion
matrix[j - 1][i - 1] + indicator // substitution
);
}
}
return matrix[str2.length][str1.length];
}
/**
* Find the best matching puzzle name from available options using multiple strategies
*/
private findBestPuzzleMatch(ocrText: string): string {
if (!this.availablePuzzleNames.length) {
return ocrText.trim();
}
const cleanedOcr = ocrText.trim();
if (!cleanedOcr) return '';
// Strategy 1: Exact match (case insensitive)
const exactMatch = this.availablePuzzleNames.find(
name => name.toLowerCase() === cleanedOcr.toLowerCase()
);
if (exactMatch) return exactMatch;
// Strategy 2: Substring match (either direction)
const substringMatch = this.availablePuzzleNames.find(
name => name.toLowerCase().includes(cleanedOcr.toLowerCase()) ||
cleanedOcr.toLowerCase().includes(name.toLowerCase())
);
if (substringMatch) return substringMatch;
// Strategy 3: Multiple fuzzy matching approaches
let bestMatch = cleanedOcr;
let bestScore = 0;
for (const puzzleName of this.availablePuzzleNames) {
const scores = [
this.calculateLevenshteinSimilarity(cleanedOcr, puzzleName),
this.calculateJaroWinklerSimilarity(cleanedOcr, puzzleName),
this.calculateNGramSimilarity(cleanedOcr, puzzleName, 2)
];
// Use the maximum score from all algorithms
const maxScore = Math.max(...scores);
// Lower threshold for better matching - force selection even with moderate confidence
if (maxScore > bestScore && maxScore > 0.4) {
bestScore = maxScore;
bestMatch = puzzleName;
}
}
// Strategy 4: If no good match found, try character-based matching
if (bestScore < 0.6) {
const charMatch = this.findBestCharacterMatch(cleanedOcr);
if (charMatch) {
bestMatch = charMatch;
}
}
return bestMatch;
}
/**
* Calculate Levenshtein similarity (normalized)
*/
private calculateLevenshteinSimilarity(str1: string, str2: string): number {
const distance = this.levenshteinDistance(str1.toLowerCase(), str2.toLowerCase());
const maxLength = Math.max(str1.length, str2.length);
return maxLength === 0 ? 1 : 1 - (distance / maxLength);
}
/**
* Calculate Jaro-Winkler similarity
*/
private calculateJaroWinklerSimilarity(str1: string, str2: string): number {
const s1 = str1.toLowerCase();
const s2 = str2.toLowerCase();
if (s1 === s2) return 1;
const matchWindow = Math.floor(Math.max(s1.length, s2.length) / 2) - 1;
if (matchWindow < 0) return 0;
const s1Matches = new Array(s1.length).fill(false);
const s2Matches = new Array(s2.length).fill(false);
let matches = 0;
let transpositions = 0;
// Find matches
for (let i = 0; i < s1.length; i++) {
const start = Math.max(0, i - matchWindow);
const end = Math.min(i + matchWindow + 1, s2.length);
for (let j = start; j < end; j++) {
if (s2Matches[j] || s1[i] !== s2[j]) continue;
s1Matches[i] = true;
s2Matches[j] = true;
matches++;
break;
}
}
if (matches === 0) return 0;
// Count transpositions
let k = 0;
for (let i = 0; i < s1.length; i++) {
if (!s1Matches[i]) continue;
while (!s2Matches[k]) k++;
if (s1[i] !== s2[k]) transpositions++;
k++;
}
const jaro = (matches / s1.length + matches / s2.length + (matches - transpositions / 2) / matches) / 3;
// Jaro-Winkler bonus for common prefix
let prefix = 0;
for (let i = 0; i < Math.min(s1.length, s2.length, 4); i++) {
if (s1[i] === s2[i]) prefix++;
else break;
}
return jaro + (0.1 * prefix * (1 - jaro));
}
/**
* Calculate N-gram similarity
*/
private calculateNGramSimilarity(str1: string, str2: string, n: number): number {
const s1 = str1.toLowerCase();
const s2 = str2.toLowerCase();
if (s1 === s2) return 1;
if (s1.length < n || s2.length < n) return 0;
const ngrams1 = new Set<string>();
const ngrams2 = new Set<string>();
for (let i = 0; i <= s1.length - n; i++) {
ngrams1.add(s1.substr(i, n));
}
for (let i = 0; i <= s2.length - n; i++) {
ngrams2.add(s2.substr(i, n));
}
const intersection = new Set([...ngrams1].filter(x => ngrams2.has(x)));
const union = new Set([...ngrams1, ...ngrams2]);
return intersection.size / union.size;
}
/**
* Find best match based on character frequency
*/
private findBestCharacterMatch(ocrText: string): string | null {
let bestMatch = null;
let bestScore = 0;
for (const puzzleName of this.availablePuzzleNames) {
const score = this.calculateCharacterFrequencyScore(ocrText.toLowerCase(), puzzleName.toLowerCase());
if (score > bestScore && score > 0.3) {
bestScore = score;
bestMatch = puzzleName;
}
}
return bestMatch;
}
/**
* Calculate character frequency similarity
*/
private calculateCharacterFrequencyScore(str1: string, str2: string): number {
const freq1 = new Map<string, number>();
const freq2 = new Map<string, number>();
for (const char of str1) {
freq1.set(char, (freq1.get(char) || 0) + 1);
}
for (const char of str2) {
freq2.set(char, (freq2.get(char) || 0) + 1);
}
const allChars = new Set([...freq1.keys(), ...freq2.keys()]);
let similarity = 0;
let totalChars = 0;
for (const char of allChars) {
const count1 = freq1.get(char) || 0;
const count2 = freq2.get(char) || 0;
similarity += Math.min(count1, count2);
totalChars += Math.max(count1, count2);
}
return totalChars === 0 ? 0 : similarity / totalChars;
}
/**
* Force a match to available puzzle names - always returns a puzzle name
* This is used as a last resort to ensure OCR always selects from available puzzles
*/
private findBestPuzzleMatchForced(ocrText: string): string | null {
if (!this.availablePuzzleNames.length || !ocrText.trim()) {
return null;
}
const cleanedOcr = ocrText.trim().toLowerCase();
let bestMatch = this.availablePuzzleNames[0]; // Default to first puzzle
let bestScore = 0;
// Try all matching algorithms and pick the best overall score
for (const puzzleName of this.availablePuzzleNames) {
const scores = [
this.calculateLevenshteinSimilarity(cleanedOcr, puzzleName),
this.calculateJaroWinklerSimilarity(cleanedOcr, puzzleName),
this.calculateNGramSimilarity(cleanedOcr, puzzleName, 2),
this.calculateCharacterFrequencyScore(cleanedOcr, puzzleName.toLowerCase()),
// Add length similarity bonus
this.calculateLengthSimilarity(cleanedOcr, puzzleName.toLowerCase())
];
// Use weighted average with emphasis on character frequency and length
const weightedScore = (
scores[0] * 0.25 + // Levenshtein
scores[1] * 0.25 + // Jaro-Winkler
scores[2] * 0.2 + // N-gram
scores[3] * 0.2 + // Character frequency
scores[4] * 0.1 // Length similarity
);
if (weightedScore > bestScore) {
bestScore = weightedScore;
bestMatch = puzzleName;
}
}
console.log(`Forced match for "${ocrText}": "${bestMatch}" (score: ${bestScore.toFixed(3)})`);
return bestMatch;
}
/**
* Calculate similarity based on string length
*/
private calculateLengthSimilarity(str1: string, str2: string): number {
const len1 = str1.length;
const len2 = str2.length;
const maxLen = Math.max(len1, len2);
const minLen = Math.min(len1, len2);
return maxLen === 0 ? 1 : minLen / maxLen;
}
async terminate(): Promise<void> {
if (this.worker) {
await this.worker.terminate();
this.worker = null;
}
}
// Utility method to validate if an image looks like an Opus Magnum screenshot
static isValidOpusMagnumImage(file: File): boolean {
// Basic validation - could be enhanced with actual image analysis
const validTypes = ['image/jpeg', 'image/jpg', 'image/png', 'image/gif'];
return validTypes.includes(file.type);
}
// Debug method to visualize OCR regions (similar to main.py debug rectangles)
static drawDebugRegions(imageFile: File): Promise<string> {
return new Promise((resolve, reject) => {
const imageUrl = URL.createObjectURL(imageFile);
const img = new Image();
img.onload = () => {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d')!;
canvas.width = img.width;
canvas.height = img.height;
ctx.drawImage(img, 0, 0);
// Draw debug rectangles
ctx.strokeStyle = '#00ff00';
ctx.lineWidth = 2;
const service = new OpusMagnumOCRService();
Object.values(service.regions).forEach(region => {
ctx.strokeRect(region.x, region.y, region.width, region.height);
});
URL.revokeObjectURL(imageUrl);
resolve(canvas.toDataURL());
};
img.onerror = () => {
URL.revokeObjectURL(imageUrl);
reject(new Error('Failed to load image for debug'));
};
img.src = imageUrl;
});
}
}
// Singleton instance for the application
export const ocrService = new OpusMagnumOCRService();