feature/boss-sprites-and-badges (#22)
Reviewed-on: TheFurya/nuzlocke-tracker#22 Co-authored-by: Julian Tabel <juliantabel.jt@gmail.com> Co-committed-by: Julian Tabel <juliantabel.jt@gmail.com>
This commit was merged in pull request #22.
This commit is contained in:
146
scripts/fetch_badges.py
Normal file
146
scripts/fetch_badges.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""One-time script to download badge images from Bulbapedia."""
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
BADGES_DIR = Path(__file__).resolve().parent.parent / "frontend" / "public" / "badges"
|
||||
SEEDS_DIR = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "backend"
|
||||
/ "src"
|
||||
/ "app"
|
||||
/ "seeds"
|
||||
/ "data"
|
||||
)
|
||||
|
||||
MEDIAWIKI_API = "https://archives.bulbagarden.net/w/api.php"
|
||||
|
||||
|
||||
def get_referenced_badges() -> set[str]:
|
||||
"""Extract all unique non-null badge_image_url from seed files."""
|
||||
badges = set()
|
||||
for f in SEEDS_DIR.glob("*-bosses.json"):
|
||||
data = json.loads(f.read_text())
|
||||
for boss in data:
|
||||
url = boss.get("badge_image_url")
|
||||
if url:
|
||||
badges.add(url)
|
||||
return badges
|
||||
|
||||
|
||||
def get_missing_badges() -> list[str]:
|
||||
"""Return badge paths that are referenced but don't exist on disk."""
|
||||
referenced = get_referenced_badges()
|
||||
missing = []
|
||||
for badge_path in sorted(referenced):
|
||||
full_path = BADGES_DIR / Path(badge_path).name
|
||||
if not full_path.exists():
|
||||
missing.append(badge_path)
|
||||
return missing
|
||||
|
||||
|
||||
def badge_path_to_bulbapedia_filename(badge_path: str) -> str:
|
||||
"""Convert /badges/coal-badge.png -> Coal_Badge.png"""
|
||||
name = Path(badge_path).stem # e.g. "coal-badge"
|
||||
parts = name.split("-") # ["coal", "badge"]
|
||||
title_parts = [p.capitalize() for p in parts]
|
||||
return "_".join(title_parts) + ".png"
|
||||
|
||||
|
||||
def resolve_image_urls(filenames: list[str]) -> dict[str, str | None]:
|
||||
"""Use MediaWiki API to resolve image filenames to direct URLs."""
|
||||
results = {}
|
||||
# Process in batches of 50
|
||||
for i in range(0, len(filenames), 50):
|
||||
batch = filenames[i : i + 50]
|
||||
titles = "|".join(f"File:{fn}" for fn in batch)
|
||||
cmd = [
|
||||
"curl",
|
||||
"-s",
|
||||
f"{MEDIAWIKI_API}?action=query&titles={titles}"
|
||||
"&prop=imageinfo&iiprop=url&format=json",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
data = json.loads(result.stdout)
|
||||
|
||||
# Build normalization map (API normalizes underscores to spaces)
|
||||
norm_map = {}
|
||||
for entry in data.get("query", {}).get("normalized", []):
|
||||
norm_map[entry["to"]] = entry["from"]
|
||||
|
||||
pages = data.get("query", {}).get("pages", {})
|
||||
for page in pages.values():
|
||||
title = page.get("title", "").replace("File:", "")
|
||||
# Map back to original underscore form
|
||||
original = norm_map.get(f"File:{title}", f"File:{title}").replace(
|
||||
"File:", ""
|
||||
)
|
||||
imageinfo = page.get("imageinfo", [])
|
||||
if imageinfo:
|
||||
results[original] = imageinfo[0]["url"]
|
||||
else:
|
||||
results[original] = None
|
||||
return results
|
||||
|
||||
|
||||
def download_file(url: str, dest: Path) -> bool:
|
||||
"""Download a file using curl."""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
result = subprocess.run(
|
||||
["curl", "-sL", "-o", str(dest), url],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return result.returncode == 0 and dest.exists() and dest.stat().st_size > 0
|
||||
|
||||
|
||||
def main():
|
||||
missing = get_missing_badges()
|
||||
if not missing:
|
||||
print("All badge images already exist!")
|
||||
return
|
||||
|
||||
print(f"Missing {len(missing)} badge images:")
|
||||
for b in missing:
|
||||
print(f" {b}")
|
||||
|
||||
# Build mapping: badge_path -> bulbapedia_filename
|
||||
path_to_filename = {}
|
||||
for badge_path in missing:
|
||||
path_to_filename[badge_path] = badge_path_to_bulbapedia_filename(badge_path)
|
||||
|
||||
print(f"\nResolving {len(path_to_filename)} image URLs from Bulbapedia...")
|
||||
filenames = list(set(path_to_filename.values()))
|
||||
url_map = resolve_image_urls(filenames)
|
||||
|
||||
# Download
|
||||
success = 0
|
||||
failed = []
|
||||
for badge_path, bp_filename in sorted(path_to_filename.items()):
|
||||
url = url_map.get(bp_filename)
|
||||
if not url:
|
||||
print(f" FAILED: {badge_path} (no URL for {bp_filename})")
|
||||
failed.append((badge_path, bp_filename))
|
||||
continue
|
||||
|
||||
dest = BADGES_DIR / Path(badge_path).name
|
||||
if download_file(url, dest):
|
||||
print(f" OK: {badge_path}")
|
||||
success += 1
|
||||
else:
|
||||
print(f" FAILED: {badge_path} (download error)")
|
||||
failed.append((badge_path, bp_filename))
|
||||
|
||||
print(f"\nDownloaded: {success}/{len(missing)}")
|
||||
if failed:
|
||||
print(f"Failed ({len(failed)}):")
|
||||
for badge_path, bp_filename in failed:
|
||||
print(f" {badge_path} -> {bp_filename}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user