diff --git a/.beans/nuzlocke-tracker-dqyb--set-up-python-tool-scaffold.md b/.beans/nuzlocke-tracker-dqyb--set-up-python-tool-scaffold.md index 6d9d3fb..6cd089f 100644 --- a/.beans/nuzlocke-tracker-dqyb--set-up-python-tool-scaffold.md +++ b/.beans/nuzlocke-tracker-dqyb--set-up-python-tool-scaffold.md @@ -1,11 +1,11 @@ --- # nuzlocke-tracker-dqyb title: Set up Python tool scaffold -status: in-progress +status: completed type: task priority: normal created_at: 2026-02-11T08:42:58Z -updated_at: 2026-02-11T08:44:03Z +updated_at: 2026-02-11T08:49:55Z parent: nuzlocke-tracker-bs05 blocking: - nuzlocke-tracker-zno2 diff --git a/.beans/nuzlocke-tracker-zno2--build-reference-data-mappings.md b/.beans/nuzlocke-tracker-zno2--build-reference-data-mappings.md index e5ac9cd..37dcb52 100644 --- a/.beans/nuzlocke-tracker-zno2--build-reference-data-mappings.md +++ b/.beans/nuzlocke-tracker-zno2--build-reference-data-mappings.md @@ -1,11 +1,11 @@ --- # nuzlocke-tracker-zno2 title: Build reference data mappings -status: todo +status: in-progress type: task priority: normal created_at: 2026-02-11T08:43:02Z -updated_at: 2026-02-11T08:43:33Z +updated_at: 2026-02-11T08:50:29Z parent: nuzlocke-tracker-bs05 blocking: - nuzlocke-tracker-rfg0 @@ -15,10 +15,10 @@ Build the lookup maps needed to translate PokeDB identifiers into our seed forma ## Checklist -- [ ] **Pokemon form mapping**: Map `pokemon_form_identifier` (e.g. "pidgey-default", "mr-mime-default") to `pokeapi_id` using the existing `backend/src/app/seeds/data/pokemon.json` as reference. Handle naming convention differences between PokeDB and PokeAPI (may need fuzzy matching or a manual override table). -- [ ] **Location area mapping**: Map `location_area_identifier` to human-readable location names and regions using `locations.json` and `location_areas.json`. Produce names matching our existing format (e.g. "Route 1", "Viridian Forest"). -- [ ] **Encounter method mapping**: Map PokeDB's 73 encounter methods to our simplified set. See the draft mapping in the parent bean. Implement as a dictionary/config that's easy to extend. -- [ ] **Version mapping**: Map PokeDB `version_identifiers` to our game slugs (should mostly be 1:1 but verify). +- [x] **Pokemon form mapping**: Map `pokemon_form_identifier` (e.g. "pidgey-default", "mr-mime-default") to `pokeapi_id` using the existing `backend/src/app/seeds/data/pokemon.json` as reference. Handle naming convention differences between PokeDB and PokeAPI (may need fuzzy matching or a manual override table). +- [x] **Location area mapping**: Map `location_area_identifier` to human-readable location names and regions using `locations.json` and `location_areas.json`. Produce names matching our existing format (e.g. "Route 1", "Viridian Forest"). +- [x] **Encounter method mapping**: Map PokeDB's 73 encounter methods to our simplified set. See the draft mapping in the parent bean. Implement as a dictionary/config that's easy to extend. +- [x] **Version mapping**: Map PokeDB `version_identifiers` to our game slugs (should mostly be 1:1 but verify). ## Notes - The pokemon form mapping is the trickiest part — PokeDB uses identifiers like "mr-mime-default" while our pokemon.json uses names like "Mr. Mime" and pokeapi IDs diff --git a/.gitignore b/.gitignore index 5eed95c..d288b78 100644 --- a/.gitignore +++ b/.gitignore @@ -60,8 +60,9 @@ temp/ *.tmp *.temp -# PokeAPI fetch cache +# PokeAPI / PokeDB data cache .pokeapi_cache/ +.pokedb_cache/ # Go build output tools/fetch-pokeapi/fetch-pokeapi diff --git a/tools/import-pokedb/import_pokedb/__main__.py b/tools/import-pokedb/import_pokedb/__main__.py index 603daf3..5343aed 100644 --- a/tools/import-pokedb/import_pokedb/__main__.py +++ b/tools/import-pokedb/import_pokedb/__main__.py @@ -1,11 +1,13 @@ """CLI entry point for the PokeDB import tool. Usage: - # From repo root: - python -m import_pokedb ./pokedb-export/ + # From tools/import-pokedb/ (auto-downloads PokeDB data on first run): + python -m import_pokedb # With options: - python -m import_pokedb ./pokedb-export/ --output backend/src/app/seeds/data/ --game firered + python -m import_pokedb --game firered + python -m import_pokedb --pokedb-dir ~/my-pokedb-data/ + python -m import_pokedb --output /tmp/seed-output/ """ from __future__ import annotations @@ -15,6 +17,7 @@ import sys from pathlib import Path from .loader import load_pokedb_data, load_seed_config +from .mappings import PokemonMapper, LocationMapper, build_version_map, map_encounter_method SEEDS_DIR_CANDIDATES = [ Path("backend/src/app/seeds"), # from repo root @@ -37,9 +40,10 @@ def build_parser() -> argparse.ArgumentParser: description="Convert PokeDB.org JSON data exports into nuzlocke-tracker seed format.", ) parser.add_argument( - "pokedb_dir", + "--pokedb-dir", type=Path, - help="Path to directory containing PokeDB JSON export files", + default=None, + help="Path to directory containing PokeDB JSON export files (default: .pokedb_cache/)", ) parser.add_argument( "--output", @@ -60,12 +64,8 @@ def main(argv: list[str] | None = None) -> None: parser = build_parser() args = parser.parse_args(argv) - pokedb_dir: Path = args.pokedb_dir - if not pokedb_dir.is_dir(): - print(f"Error: {pokedb_dir} is not a directory", file=sys.stderr) - sys.exit(1) - seeds_dir = find_seeds_dir() + pokedb_dir: Path = args.pokedb_dir or (seeds_dir / ".pokedb_cache") output_dir: Path = args.output or (seeds_dir / "data") output_dir.mkdir(parents=True, exist_ok=True) @@ -107,8 +107,42 @@ def main(argv: list[str] | None = None) -> None: ) print(f"Target: all {total_games} games") - # TODO: Processing pipeline (subtasks zno2, rfg0, gkcy) - print("\nScaffold loaded successfully. Processing pipeline not yet implemented.") + # Build mappings + print("\nBuilding mappings...") + + pokemon_json = seeds_dir / "data" / "pokemon.json" + pokemon_mapper = PokemonMapper(pokemon_json, pokedb) + + location_mapper = LocationMapper(pokedb) + + version_map = build_version_map(pokedb, config.version_groups) + print(f" Mapped {len(version_map)} PokeDB versions to our game slugs") + + # Report encounter method coverage + pokedb_methods = {e.get("encounter_method_identifier", "") for e in pokedb.encounters} + pokedb_methods.discard("") + mapped_methods = {m for m in pokedb_methods if map_encounter_method(m) is not None} + unmapped_methods = pokedb_methods - mapped_methods + print(f" Encounter methods: {len(mapped_methods)} mapped, {len(unmapped_methods)} unmapped") + if unmapped_methods: + print(" Unmapped methods:", file=sys.stderr) + for m in sorted(unmapped_methods): + print(f" - {m}", file=sys.stderr) + + # Spot-check pokemon mapping on actual encounter data + form_ids_in_encounters = {e.get("pokemon_form_identifier", "") for e in pokedb.encounters} + form_ids_in_encounters.discard("") + mapped_forms = 0 + for fid in form_ids_in_encounters: + if pokemon_mapper.lookup(fid) is not None: + mapped_forms += 1 + total_forms = len(form_ids_in_encounters) + print(f" Pokemon forms: {mapped_forms}/{total_forms} mapped from encounters") + + pokemon_mapper.report_unmapped() + + # TODO: Processing pipeline (subtasks rfg0, gkcy) + print("\nMappings built. Processing pipeline not yet implemented.") if __name__ == "__main__": diff --git a/tools/import-pokedb/import_pokedb/loader.py b/tools/import-pokedb/import_pokedb/loader.py index 22632a8..bce5092 100644 --- a/tools/import-pokedb/import_pokedb/loader.py +++ b/tools/import-pokedb/import_pokedb/loader.py @@ -4,17 +4,20 @@ from __future__ import annotations import json import sys +import urllib.request from pathlib import Path from typing import Any -REQUIRED_FILES = [ - "encounters.json", - "locations.json", - "location_areas.json", - "encounter_methods.json", - "versions.json", - "pokemon_forms.json", -] +_CDN_BASE = "https://cdn.pokedb.org" + +REQUIRED_FILES: dict[str, str] = { + "encounters.json": f"{_CDN_BASE}/data_export_encounters_json", + "locations.json": f"{_CDN_BASE}/data_export_locations_json", + "location_areas.json": f"{_CDN_BASE}/data_export_location_areas_json", + "encounter_methods.json": f"{_CDN_BASE}/data_export_encounter_methods_json", + "versions.json": f"{_CDN_BASE}/data_export_versions_json", + "pokemon_forms.json": f"{_CDN_BASE}/data_export_pokemon_forms_json", +} class PokeDBData: @@ -48,23 +51,38 @@ class PokeDBData: ) +def download_pokedb_data(data_dir: Path) -> None: + """Download missing PokeDB JSON export files into data_dir.""" + data_dir.mkdir(parents=True, exist_ok=True) + missing = {f: url for f, url in REQUIRED_FILES.items() if not (data_dir / f).exists()} + if not missing: + return + + print(f"Downloading {len(missing)} PokeDB file(s) to {data_dir}...") + for filename, url in missing.items(): + dest = data_dir / filename + print(f" {filename}...", end="", flush=True) + try: + urllib.request.urlretrieve(url, dest) + size_mb = dest.stat().st_size / (1024 * 1024) + print(f" {size_mb:.1f} MB") + except Exception as e: + print(f" FAILED", file=sys.stderr) + print(f"Error downloading {url}: {e}", file=sys.stderr) + sys.exit(1) + print() + + def load_pokedb_data(data_dir: Path) -> PokeDBData: """Load all PokeDB JSON export files from a directory. - Exits with an error message if any required files are missing or unparseable. + Downloads any missing files automatically, then validates and loads them. """ + download_pokedb_data(data_dir) + missing = [f for f in REQUIRED_FILES if not (data_dir / f).exists()] if missing: - print( - f"Error: Missing required PokeDB files in {data_dir}:", - file=sys.stderr, - ) - for f in missing: - print(f" - {f}", file=sys.stderr) - print( - "\nDownload the JSON export from https://pokedb.org/data-export", - file=sys.stderr, - ) + print(f"Error: Still missing files after download: {missing}", file=sys.stderr) sys.exit(1) def _load(filename: str) -> list[dict[str, Any]]: diff --git a/tools/import-pokedb/import_pokedb/mappings.py b/tools/import-pokedb/import_pokedb/mappings.py new file mode 100644 index 0000000..8d4d097 --- /dev/null +++ b/tools/import-pokedb/import_pokedb/mappings.py @@ -0,0 +1,384 @@ +"""Reference data mappings: PokeDB identifiers → seed format values.""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from typing import Any + +from .loader import PokeDBData + + +# --------------------------------------------------------------------------- +# Encounter method mapping +# --------------------------------------------------------------------------- + +# PokeDB encounter_method_identifier → our simplified method name. +# Keys can be exact matches or prefix patterns (ending with *). +ENCOUNTER_METHOD_MAP: dict[str, str] = { + # Walking / grass / cave + "walking-tall-grass": "walk", + "walking-long-grass": "walk", + "walking-cave": "walk", + "walking-bridge": "walk", + "walking-building": "walk", + "walking-sand": "walk", + "walking-snow": "walk", + "walking-rough-terrain": "walk", + "walking-marsh": "walk", + "walking-puddle": "walk", + "walking-flower-field": "walk", + "walking-ice": "walk", + "walking-forest": "walk", + "walking-snowfield": "walk", + "dark-grass": "walk", + "shaking-grass": "walk", + "rustling-grass": "walk", + "yellow-flowers": "walk", + "red-flowers": "walk", + "purple-flowers": "walk", + # Surfing + "surfing": "surf", + "surfing-ocean": "surf", + "surfing-puddle": "surf", + "surfing-rapids": "surf", + "surfing-underwater": "surf", + "rippling-water": "surf", + # Fishing + "fishing-old-rod": "old-rod", + "fishing-good-rod": "good-rod", + "fishing-super-rod": "super-rod", + "fishing": "fishing", + "fishing-special": "fishing", + # Rock smash + "rock-smash": "rock-smash", + # Headbutt + "headbutt-low": "headbutt", + "headbutt-normal": "headbutt", + "headbutt-high": "headbutt", + # Gift / special acquisition + "npc-gift": "gift", + "egg": "gift", + "revive": "gift", + "fossil": "gift", + # Trade + "npc-trade": "trade", + # Overworld / symbol encounters (Gen 8+) + "symbol-encounter": "walk", + "wanderer": "walk", + "flying": "walk", + # Static / fixed + "fixed-encounter": "static", + "static-encounter": "static", + "legendary-encounter": "static", + "interactable": "static", + # Special methods + "swarm": "swarm", + "poke-radar": "pokeradar", + "dual-slot-mode": "dual-slot", + "honey-tree": "honey", + "trophy-garden": "walk", + "great-marsh": "walk", + "cave-spot": "walk", + "bubble-spot": "surf", + "sand-spot": "walk", + "horde": "walk", + "sos-encounter": "walk", + "ambush": "walk", + # Seaweed / diving + "diving": "surf", + "seaweed": "surf", + # Raids + "max-raid": "raid", + "dynamax-adventure": "raid", + "tera-raid": "raid", + # Misc + "roaming": "roaming", + "safari-zone": "walk", + "bug-contest": "walk", +} + +# Prefix-based fallbacks for methods not explicitly listed above. +_METHOD_PREFIX_MAP: list[tuple[str, str]] = [ + ("walking-", "walk"), + ("surfing-", "surf"), + ("fishing-", "fishing"), + ("headbutt-", "headbutt"), + ("flying-", "walk"), +] + + +def map_encounter_method(method_identifier: str) -> str | None: + """Map a PokeDB encounter method to our simplified method name. + + Returns None if the method is unrecognized. + """ + if method_identifier in ENCOUNTER_METHOD_MAP: + return ENCOUNTER_METHOD_MAP[method_identifier] + + for prefix, mapped in _METHOD_PREFIX_MAP: + if method_identifier.startswith(prefix): + return mapped + + return None + + +# --------------------------------------------------------------------------- +# Version mapping +# --------------------------------------------------------------------------- + +# PokeDB version identifiers that differ from our game slugs. +# Most are 1:1, these handle exceptions. +_VERSION_OVERRIDES: dict[str, str] = { + "lets-go-pikachu": "lets-go-pikachu", + "lets-go-eevee": "lets-go-eevee", +} + + +def build_version_map( + pokedb: PokeDBData, + version_groups: dict[str, Any], +) -> dict[str, str]: + """Build a mapping from PokeDB version_identifier → our game slug. + + Returns the mapping dict. Logs warnings for unmapped versions. + """ + # Collect all our known game slugs + our_slugs: set[str] = set() + for vg in version_groups.values(): + for slug in vg.get("versions", []): + our_slugs.add(slug) + + # Collect all PokeDB version identifiers + pokedb_versions: set[str] = set() + for v in pokedb.versions: + identifier = v.get("identifier", "") + if identifier: + pokedb_versions.add(identifier) + + mapping: dict[str, str] = {} + + for pdb_ver in sorted(pokedb_versions): + if pdb_ver in _VERSION_OVERRIDES: + mapping[pdb_ver] = _VERSION_OVERRIDES[pdb_ver] + elif pdb_ver in our_slugs: + mapping[pdb_ver] = pdb_ver + # else: PokeDB version not in our version_groups (expected for some) + + # Report our games that have no PokeDB mapping + mapped_slugs = set(mapping.values()) + unmapped_ours = our_slugs - mapped_slugs + if unmapped_ours: + print(f" Versions in our config with no PokeDB match: {sorted(unmapped_ours)}") + + return mapping + + +# --------------------------------------------------------------------------- +# Pokemon form mapping +# --------------------------------------------------------------------------- + +def _normalize_slug(identifier: str) -> str: + """Normalize a PokeDB pokemon_form_identifier to a PokeAPI-style slug. + + PokeDB uses "pidgey-default" for base forms — strip the "-default" suffix. + Non-default forms like "rattata-alola" are already PokeAPI-style slugs. + """ + if identifier.endswith("-default"): + return identifier[: -len("-default")] + return identifier + + +def _name_to_slug(name: str) -> str: + """Convert a display name to a PokeAPI-style slug. + + "Bulbasaur" → "bulbasaur" + "Mr. Mime" → "mr-mime" + "Farfetch'd" → "farfetchd" + "Nidoran♀" → "nidoran-f" + "Nidoran♂" → "nidoran-m" + "Flabébé" → "flabebe" + "Type: Null" → "type-null" + """ + s = name.lower() + s = s.replace("♀", "-f").replace("♂", "-m") + s = s.replace("'", "").replace("'", "").replace(".", "").replace(":", "") + s = s.replace("é", "e").replace("É", "e") + s = s.replace(" ", "-") + # Collapse multiple hyphens + s = re.sub(r"-+", "-", s) + return s.strip("-") + + +def _name_to_form_slug(name: str) -> str | None: + """Convert a display name with form suffix to a PokeAPI-style slug. + + "Rattata (Alola)" → "rattata-alola" + "Basculin (Blue Striped)" → "basculin-blue-striped" + "Deoxys Normal" → "deoxys-normal" (space-separated variant) + """ + # Try parenthesized form: "Base (Suffix)" + m = re.match(r"^(.+?)\s*\((.+)\)$", name) + if m: + base = _name_to_slug(m.group(1)) + suffix = _name_to_slug(m.group(2)) + return f"{base}-{suffix}" + + # Try space-separated form: "Deoxys Normal" + parts = name.split() + if len(parts) >= 2: + return _name_to_slug(name) + + return None + + +class PokemonMapper: + """Maps PokeDB pokemon_form_identifier → (pokeapi_id, display_name).""" + + def __init__(self, pokemon_json_path: Path, pokedb: PokeDBData) -> None: + # Build slug → (pokeapi_id, name) from existing pokemon.json + self._slug_to_info: dict[str, tuple[int, str]] = {} + self._id_to_info: dict[int, tuple[int, str]] = {} # pokeapi_id → (national_dex, name) + self._unmapped: set[str] = set() + + if pokemon_json_path.exists(): + with open(pokemon_json_path) as f: + pokemon_list = json.load(f) + + for p in pokemon_list: + pid = p["pokeapi_id"] + name = p["name"] + ndex = p["national_dex"] + self._id_to_info[pid] = (ndex, name) + + # Index by base slug (from pokeapi_id for base forms) + slug = _name_to_slug(name) + self._slug_to_info[slug] = (pid, name) + + # Also index by form slug if it has a form suffix + form_slug = _name_to_form_slug(name) + if form_slug and form_slug != slug: + self._slug_to_info[form_slug] = (pid, name) + + # Build index from PokeDB pokemon_forms.json if it has useful fields + self._pokedb_form_index: dict[str, dict] = {} + for form in pokedb.pokemon_forms: + identifier = form.get("identifier", "") + if identifier: + self._pokedb_form_index[identifier] = form + + def lookup(self, pokemon_form_identifier: str | None) -> tuple[int, str] | None: + """Look up a PokeDB pokemon_form_identifier. + + Returns (pokeapi_id, display_name) or None if unmapped. + """ + if not pokemon_form_identifier: + return None + + slug = _normalize_slug(pokemon_form_identifier) + + # Direct slug match + if slug in self._slug_to_info: + return self._slug_to_info[slug] + + # Try the PokeDB form record for a pokemon_id field + form_record = self._pokedb_form_index.get(pokemon_form_identifier, {}) + pokemon_id = form_record.get("pokemon_id") + if pokemon_id and pokemon_id in self._id_to_info: + ndex, name = self._id_to_info[pokemon_id] + # Cache for future lookups + self._slug_to_info[slug] = (pokemon_id, name) + return (pokemon_id, name) + + # Track unmapped + if pokemon_form_identifier not in self._unmapped: + self._unmapped.add(pokemon_form_identifier) + + return None + + def report_unmapped(self) -> None: + """Print warnings for any unmapped identifiers.""" + if self._unmapped: + print( + f"\nWarning: {len(self._unmapped)} unmapped pokemon form identifiers:", + file=sys.stderr, + ) + for ident in sorted(self._unmapped): + print(f" - {ident}", file=sys.stderr) + + +# --------------------------------------------------------------------------- +# Location area mapping +# --------------------------------------------------------------------------- + +# Region prefixes to strip from location identifiers (matching Go tool behavior). +_REGION_PREFIXES = [ + "kanto-", "johto-", "hoenn-", "sinnoh-", + "unova-", "kalos-", "alola-", "galar-", + "hisui-", "paldea-", +] + + +def _identifier_to_name(identifier: str) -> str: + """Convert a hyphenated identifier to a Title Case display name. + + "route-01-kanto" → "Route 01 Kanto" (region stripping done separately) + "viridian-forest" → "Viridian Forest" + """ + return identifier.replace("-", " ").title() + + +class LocationMapper: + """Maps PokeDB location_area_identifier → (location_name, area_suffix).""" + + def __init__(self, pokedb: PokeDBData) -> None: + # Build location_area_identifier → location_identifier lookup + self._area_to_location: dict[str, str] = {} + # location_identifier → location display name + self._location_names: dict[str, str] = {} + # location_area_identifier → area display name + self._area_names: dict[str, str] = {} + + # Index locations + for loc in pokedb.locations: + identifier = loc.get("identifier", "") + name = loc.get("name", "") + if identifier: + self._location_names[identifier] = name if name else self._clean_location_name(identifier) + + # Index location areas + for area in pokedb.location_areas: + area_id = area.get("identifier", "") + loc_id = area.get("location_identifier", "") + area_name = area.get("name", "") + if area_id: + self._area_to_location[area_id] = loc_id + self._area_names[area_id] = area_name if area_name else "" + + @staticmethod + def _clean_location_name(identifier: str) -> str: + """Clean a location identifier into a display name.""" + name = identifier + for prefix in _REGION_PREFIXES: + if name.startswith(prefix): + name = name[len(prefix):] + break + return _identifier_to_name(name) + + def get_location_name(self, location_area_identifier: str) -> str: + """Get the display name for a location area's parent location.""" + loc_id = self._area_to_location.get(location_area_identifier, "") + if loc_id and loc_id in self._location_names: + return self._location_names[loc_id] + # Fallback: derive from the area identifier itself + return self._clean_location_name(location_area_identifier) + + def get_area_name(self, location_area_identifier: str) -> str: + """Get the display name for a specific location area.""" + return self._area_names.get(location_area_identifier, "") + + def get_location_identifier(self, location_area_identifier: str) -> str: + """Get the parent location identifier for a location area.""" + return self._area_to_location.get(location_area_identifier, "")