Add reference data mappings and auto-download for PokeDB import tool

Add mappings module with pokemon form, location area, encounter method,
and version mappings. Auto-download PokeDB JSON exports from CDN on
first run, caching in .pokedb_cache/.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Julian Tabel
2026-02-11 10:02:57 +01:00
parent 1aa67665ff
commit df7ea64b9e
6 changed files with 477 additions and 40 deletions

View File

@@ -1,11 +1,13 @@
"""CLI entry point for the PokeDB import tool.
Usage:
# From repo root:
python -m import_pokedb ./pokedb-export/
# From tools/import-pokedb/ (auto-downloads PokeDB data on first run):
python -m import_pokedb
# With options:
python -m import_pokedb ./pokedb-export/ --output backend/src/app/seeds/data/ --game firered
python -m import_pokedb --game firered
python -m import_pokedb --pokedb-dir ~/my-pokedb-data/
python -m import_pokedb --output /tmp/seed-output/
"""
from __future__ import annotations
@@ -15,6 +17,7 @@ import sys
from pathlib import Path
from .loader import load_pokedb_data, load_seed_config
from .mappings import PokemonMapper, LocationMapper, build_version_map, map_encounter_method
SEEDS_DIR_CANDIDATES = [
Path("backend/src/app/seeds"), # from repo root
@@ -37,9 +40,10 @@ def build_parser() -> argparse.ArgumentParser:
description="Convert PokeDB.org JSON data exports into nuzlocke-tracker seed format.",
)
parser.add_argument(
"pokedb_dir",
"--pokedb-dir",
type=Path,
help="Path to directory containing PokeDB JSON export files",
default=None,
help="Path to directory containing PokeDB JSON export files (default: .pokedb_cache/)",
)
parser.add_argument(
"--output",
@@ -60,12 +64,8 @@ def main(argv: list[str] | None = None) -> None:
parser = build_parser()
args = parser.parse_args(argv)
pokedb_dir: Path = args.pokedb_dir
if not pokedb_dir.is_dir():
print(f"Error: {pokedb_dir} is not a directory", file=sys.stderr)
sys.exit(1)
seeds_dir = find_seeds_dir()
pokedb_dir: Path = args.pokedb_dir or (seeds_dir / ".pokedb_cache")
output_dir: Path = args.output or (seeds_dir / "data")
output_dir.mkdir(parents=True, exist_ok=True)
@@ -107,8 +107,42 @@ def main(argv: list[str] | None = None) -> None:
)
print(f"Target: all {total_games} games")
# TODO: Processing pipeline (subtasks zno2, rfg0, gkcy)
print("\nScaffold loaded successfully. Processing pipeline not yet implemented.")
# Build mappings
print("\nBuilding mappings...")
pokemon_json = seeds_dir / "data" / "pokemon.json"
pokemon_mapper = PokemonMapper(pokemon_json, pokedb)
location_mapper = LocationMapper(pokedb)
version_map = build_version_map(pokedb, config.version_groups)
print(f" Mapped {len(version_map)} PokeDB versions to our game slugs")
# Report encounter method coverage
pokedb_methods = {e.get("encounter_method_identifier", "") for e in pokedb.encounters}
pokedb_methods.discard("")
mapped_methods = {m for m in pokedb_methods if map_encounter_method(m) is not None}
unmapped_methods = pokedb_methods - mapped_methods
print(f" Encounter methods: {len(mapped_methods)} mapped, {len(unmapped_methods)} unmapped")
if unmapped_methods:
print(" Unmapped methods:", file=sys.stderr)
for m in sorted(unmapped_methods):
print(f" - {m}", file=sys.stderr)
# Spot-check pokemon mapping on actual encounter data
form_ids_in_encounters = {e.get("pokemon_form_identifier", "") for e in pokedb.encounters}
form_ids_in_encounters.discard("")
mapped_forms = 0
for fid in form_ids_in_encounters:
if pokemon_mapper.lookup(fid) is not None:
mapped_forms += 1
total_forms = len(form_ids_in_encounters)
print(f" Pokemon forms: {mapped_forms}/{total_forms} mapped from encounters")
pokemon_mapper.report_unmapped()
# TODO: Processing pipeline (subtasks rfg0, gkcy)
print("\nMappings built. Processing pipeline not yet implemented.")
if __name__ == "__main__":