# HOW TO USE # 1. Install Tesseract (https://github.com/tesseract-ocr/tesseract/releases/) # 2. Install required libraries (see in the code below) # 3. Put all your Opus Magnum-generated gifs in a folder # 4. Facultative: if you are importing your gifs automatically from Discord using DiscordChatExporter, also save the CSV data file # 5. Facultative: if you want to make an automatic correspondence between the usernames and desired display names, you can create a username.csv file with 2 columns: usename and name # 6. Change the settings in the code below according to your needs # 7. If everything goes well, the results should be output on the console as well as in a csv file. # Import required packages import cv2 import pytesseract from PIL import Image import os import pandas as pd ### SETTINGS # Mention the installed location of Tesseract-OCR in your system # pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe' pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' # Insert the path of the folder containing the GIFs here gifs_path = "gifs_opus_magnum" # Do you want to include the Discord usernames in the output (requires Discord messages data file)? If False, will use name of file instead use_discord_data = False # Insert the path of the CSV file containing Discord messages data discord_data_path = "EvLan - EvLan 2 - solutions-opus-magnum [1260715259514327070].csv" # Do you want to include the real names of the participants (requires username correspondence file)? use_real_names = False # Insert the path of the CSV file containing username -> name correspondence usernames_data_path = "usernames.csv" # Insert here the desired path for the output CSV file output_path = "results_opus_magnum.csv" ### END OF SETTINGS results = pd.DataFrame(columns=['username','name','puzzle','cost','cycles','area','notes']) filenames = os.listdir(gifs_path) if use_discord_data: discord_data_df = pd.read_csv(discord_data_path) if use_real_names: usernames_data_df = pd.read_csv(usernames_data_path) for filename in filenames: img_path = os.path.join(gifs_path, filename) # Convert GIF to JPG with Image.open(img_path) as img: width, height = img.size img.seek(0) rgb_img = img.convert("RGB") rgb_img.save("temp.jpg", "JPEG") # Read image from which text needs to be extracted img = cv2.imread("temp.jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Optional: resize for better OCR gray = cv2.resize(gray, None, fx=1, fy=1, interpolation=cv2.INTER_CUBIC) # Manually crop regions based on known layout (x, y, w, h) regions = [ (15, 600, 330, 28), # PUZZLE NAME (412, 603, 65, 22), # COST (577, 603, 65, 22), # CYCLES (739, 603, 65, 22) # AREA ] output = img.copy() # "Username", "name" and "notes" fields are filled in in this section username = filename notes = "" if use_discord_data: for _, row in discord_data_df.iterrows(): attachments = row['Attachments'].split(',') for attachment in attachments: if filename == attachment.split('\\')[1]: username = row['Author'] notes = row['Content'] if use_real_names: name = usernames_data_df.loc[username == usernames_data_df['username']].iloc[0]['name'] else: name = "" def find_text(dims, gray, output, content): x, y, w, h = dims roi = gray[y:y+h, x:x+w] roi = cv2.bitwise_not(roi) if content == 'digits' or content == 'digits_with_6': config = "--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789" else: config = "--oem 3 --psm 7" text = pytesseract.image_to_string(roi, config=config).strip() # Remove the extra 6 (actually the G for Gold) for cost value if content == 'digits_with_6': text = text[:-1] cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 2) return text puzzle = find_text(regions[0], gray, output, 'letters') cost = find_text(regions[1], gray, output, 'digits_with_6') cycles = find_text(regions[2], gray, output, 'digits') area = find_text(regions[3], gray, output, 'digits') results.loc[len(results)] = [username, name, puzzle, cost, cycles, area, notes] # Save image with green rectangles around the considered zones, for debug purposes #cv2.imwrite("output_debug.jpg", output) os.remove("temp.jpg") print("Done.") print(results) results.to_csv(output_path)