From 2a925be5db2e7ac67db1ab4eae687e198f1b6370 Mon Sep 17 00:00:00 2001 From: Legrems Date: Tue, 28 Oct 2025 23:10:46 +0100 Subject: [PATCH] first commit --- .gitignore | 178 ++++++++++++++++++++++++++++++++++++++++++++++++ .python-version | 1 + main.py | 125 ++++++++++++++++++++++++++++++++++ pyproject.toml | 14 ++++ 4 files changed, 318 insertions(+) create mode 100644 .gitignore create mode 100644 .python-version create mode 100644 main.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4a02eb7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,178 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python + +tags diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/main.py b/main.py new file mode 100644 index 0000000..8af6876 --- /dev/null +++ b/main.py @@ -0,0 +1,125 @@ +# HOW TO USE +# 1. Install Tesseract (https://github.com/tesseract-ocr/tesseract/releases/) +# 2. Install required libraries (see in the code below) +# 3. Put all your Opus Magnum-generated gifs in a folder +# 4. Facultative: if you are importing your gifs automatically from Discord using DiscordChatExporter, also save the CSV data file +# 5. Facultative: if you want to make an automatic correspondence between the usernames and desired display names, you can create a username.csv file with 2 columns: usename and name +# 6. Change the settings in the code below according to your needs +# 7. If everything goes well, the results should be output on the console as well as in a csv file. + + +# Import required packages +import cv2 +import pytesseract +from PIL import Image +import os +import pandas as pd + + +### SETTINGS + +# Mention the installed location of Tesseract-OCR in your system +# pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe' +pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' + +# Insert the path of the folder containing the GIFs here +gifs_path = "gifs_opus_magnum" + +# Do you want to include the Discord usernames in the output (requires Discord messages data file)? If False, will use name of file instead +use_discord_data = False +# Insert the path of the CSV file containing Discord messages data +discord_data_path = "EvLan - EvLan 2 - solutions-opus-magnum [1260715259514327070].csv" + +# Do you want to include the real names of the participants (requires username correspondence file)? +use_real_names = False +# Insert the path of the CSV file containing username -> name correspondence +usernames_data_path = "usernames.csv" + +# Insert here the desired path for the output CSV file +output_path = "results_opus_magnum.csv" + +### END OF SETTINGS + + +results = pd.DataFrame(columns=['username','name','puzzle','cost','cycles','area','notes']) + +filenames = os.listdir(gifs_path) + +if use_discord_data: + discord_data_df = pd.read_csv(discord_data_path) +if use_real_names: + usernames_data_df = pd.read_csv(usernames_data_path) + +for filename in filenames: + img_path = os.path.join(gifs_path, filename) + + # Convert GIF to JPG + with Image.open(img_path) as img: + width, height = img.size + img.seek(0) + rgb_img = img.convert("RGB") + rgb_img.save("temp.jpg", "JPEG") + + # Read image from which text needs to be extracted + img = cv2.imread("temp.jpg") + + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Optional: resize for better OCR + gray = cv2.resize(gray, None, fx=1, fy=1, interpolation=cv2.INTER_CUBIC) + + # Manually crop regions based on known layout (x, y, w, h) + regions = [ + (15, 600, 330, 28), # PUZZLE NAME + (412, 603, 65, 22), # COST + (577, 603, 65, 22), # CYCLES + (739, 603, 65, 22) # AREA + ] + + output = img.copy() + + # "Username", "name" and "notes" fields are filled in in this section + username = filename + notes = "" + if use_discord_data: + for _, row in discord_data_df.iterrows(): + attachments = row['Attachments'].split(',') + for attachment in attachments: + if filename == attachment.split('\\')[1]: + username = row['Author'] + notes = row['Content'] + if use_real_names: + name = usernames_data_df.loc[username == usernames_data_df['username']].iloc[0]['name'] + else: + name = "" + + def find_text(dims, gray, output, content): + x, y, w, h = dims + roi = gray[y:y+h, x:x+w] + roi = cv2.bitwise_not(roi) + if content == 'digits' or content == 'digits_with_6': + config = "--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789" + else: + config = "--oem 3 --psm 7" + text = pytesseract.image_to_string(roi, config=config).strip() + # Remove the extra 6 (actually the G for Gold) for cost value + if content == 'digits_with_6': + text = text[:-1] + cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 2) + return text + + puzzle = find_text(regions[0], gray, output, 'letters') + cost = find_text(regions[1], gray, output, 'digits_with_6') + cycles = find_text(regions[2], gray, output, 'digits') + area = find_text(regions[3], gray, output, 'digits') + + results.loc[len(results)] = [username, name, puzzle, cost, cycles, area, notes] + + # Save image with green rectangles around the considered zones, for debug purposes + #cv2.imwrite("output_debug.jpg", output) + + os.remove("temp.jpg") + +print("Done.") +print(results) +results.to_csv(output_path) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ef34626 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "submitter" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "django>=5.2.7", + "django-cas-ng>=5.0.1", + "django-shinobi>=1.4.0", + "opencv-python>=4.12.0.88", + "pandas>=2.3.3", + "pytesseract>=0.3.13", +]