- Download boss battle sprites from Bulbapedia for all 21 games (313 bosses) - Download 36 missing badge images from Bulbapedia archives - Fix non-square boss sprite stretching (use h-10 w-auto instead of w-10 h-10) - Fix firered Giovanni seed data (badge was "50" instead of "Earth Badge", missing ground specialty_type) - Add one-time fetch scripts for boss sprites and badges Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
147 lines
4.6 KiB
Python
147 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""One-time script to download badge images from Bulbapedia."""
|
|
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
BADGES_DIR = Path(__file__).resolve().parent.parent / "frontend" / "public" / "badges"
|
|
SEEDS_DIR = (
|
|
Path(__file__).resolve().parent.parent
|
|
/ "backend"
|
|
/ "src"
|
|
/ "app"
|
|
/ "seeds"
|
|
/ "data"
|
|
)
|
|
|
|
MEDIAWIKI_API = "https://archives.bulbagarden.net/w/api.php"
|
|
|
|
|
|
def get_referenced_badges() -> set[str]:
|
|
"""Extract all unique non-null badge_image_url from seed files."""
|
|
badges = set()
|
|
for f in SEEDS_DIR.glob("*-bosses.json"):
|
|
data = json.loads(f.read_text())
|
|
for boss in data:
|
|
url = boss.get("badge_image_url")
|
|
if url:
|
|
badges.add(url)
|
|
return badges
|
|
|
|
|
|
def get_missing_badges() -> list[str]:
|
|
"""Return badge paths that are referenced but don't exist on disk."""
|
|
referenced = get_referenced_badges()
|
|
missing = []
|
|
for badge_path in sorted(referenced):
|
|
full_path = BADGES_DIR / Path(badge_path).name
|
|
if not full_path.exists():
|
|
missing.append(badge_path)
|
|
return missing
|
|
|
|
|
|
def badge_path_to_bulbapedia_filename(badge_path: str) -> str:
|
|
"""Convert /badges/coal-badge.png -> Coal_Badge.png"""
|
|
name = Path(badge_path).stem # e.g. "coal-badge"
|
|
parts = name.split("-") # ["coal", "badge"]
|
|
title_parts = [p.capitalize() for p in parts]
|
|
return "_".join(title_parts) + ".png"
|
|
|
|
|
|
def resolve_image_urls(filenames: list[str]) -> dict[str, str | None]:
|
|
"""Use MediaWiki API to resolve image filenames to direct URLs."""
|
|
results = {}
|
|
# Process in batches of 50
|
|
for i in range(0, len(filenames), 50):
|
|
batch = filenames[i : i + 50]
|
|
titles = "|".join(f"File:{fn}" for fn in batch)
|
|
cmd = [
|
|
"curl",
|
|
"-s",
|
|
f"{MEDIAWIKI_API}?action=query&titles={titles}"
|
|
"&prop=imageinfo&iiprop=url&format=json",
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
data = json.loads(result.stdout)
|
|
|
|
# Build normalization map (API normalizes underscores to spaces)
|
|
norm_map = {}
|
|
for entry in data.get("query", {}).get("normalized", []):
|
|
norm_map[entry["to"]] = entry["from"]
|
|
|
|
pages = data.get("query", {}).get("pages", {})
|
|
for page in pages.values():
|
|
title = page.get("title", "").replace("File:", "")
|
|
# Map back to original underscore form
|
|
original = norm_map.get(f"File:{title}", f"File:{title}").replace(
|
|
"File:", ""
|
|
)
|
|
imageinfo = page.get("imageinfo", [])
|
|
if imageinfo:
|
|
results[original] = imageinfo[0]["url"]
|
|
else:
|
|
results[original] = None
|
|
return results
|
|
|
|
|
|
def download_file(url: str, dest: Path) -> bool:
|
|
"""Download a file using curl."""
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
result = subprocess.run(
|
|
["curl", "-sL", "-o", str(dest), url],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
return result.returncode == 0 and dest.exists() and dest.stat().st_size > 0
|
|
|
|
|
|
def main():
|
|
missing = get_missing_badges()
|
|
if not missing:
|
|
print("All badge images already exist!")
|
|
return
|
|
|
|
print(f"Missing {len(missing)} badge images:")
|
|
for b in missing:
|
|
print(f" {b}")
|
|
|
|
# Build mapping: badge_path -> bulbapedia_filename
|
|
path_to_filename = {}
|
|
for badge_path in missing:
|
|
path_to_filename[badge_path] = badge_path_to_bulbapedia_filename(badge_path)
|
|
|
|
print(f"\nResolving {len(path_to_filename)} image URLs from Bulbapedia...")
|
|
filenames = list(set(path_to_filename.values()))
|
|
url_map = resolve_image_urls(filenames)
|
|
|
|
# Download
|
|
success = 0
|
|
failed = []
|
|
for badge_path, bp_filename in sorted(path_to_filename.items()):
|
|
url = url_map.get(bp_filename)
|
|
if not url:
|
|
print(f" FAILED: {badge_path} (no URL for {bp_filename})")
|
|
failed.append((badge_path, bp_filename))
|
|
continue
|
|
|
|
dest = BADGES_DIR / Path(badge_path).name
|
|
if download_file(url, dest):
|
|
print(f" OK: {badge_path}")
|
|
success += 1
|
|
else:
|
|
print(f" FAILED: {badge_path} (download error)")
|
|
failed.append((badge_path, bp_filename))
|
|
|
|
print(f"\nDownloaded: {success}/{len(missing)}")
|
|
if failed:
|
|
print(f"Failed ({len(failed)}):")
|
|
for badge_path, bp_filename in failed:
|
|
print(f" {badge_path} -> {bp_filename}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|