"""Fetch game data from PokeAPI and write static JSON seed files. Uses pokebase which provides built-in file caching — first run fetches from the API, subsequent runs are instant from disk cache. Usage: # Against public PokeAPI (cached after first run): podman compose exec -w /app/src api python -m app.seeds.fetch_pokeapi # Against local PokeAPI (no rate limits): podman compose exec -w /app/src api python -m app.seeds.fetch_pokeapi --base-url http://pokeapi-app:8000 """ import json import re import sys from pathlib import Path import pokebase as pb import pokebase.common as pb_common DATA_DIR = Path(__file__).parent / "data" # Game definitions VERSION_GROUPS = { "firered-leafgreen": { "versions": ["firered", "leafgreen"], "generation": 3, "region": "kanto", "region_id": 1, "games": { "firered": { "name": "Pokemon FireRed", "slug": "firered", "release_year": 2004, }, "leafgreen": { "name": "Pokemon LeafGreen", "slug": "leafgreen", "release_year": 2004, }, }, }, "emerald": { "versions": ["emerald"], "generation": 3, "region": "hoenn", "region_id": 3, "games": { "emerald": { "name": "Pokemon Emerald", "slug": "emerald", "release_year": 2005, }, }, }, "heartgold-soulsilver": { "versions": ["heartgold", "soulsilver"], "generation": 4, "region": "johto", "region_id": 2, "games": { "heartgold": { "name": "Pokemon HeartGold", "slug": "heartgold", "release_year": 2010, }, "soulsilver": { "name": "Pokemon SoulSilver", "slug": "soulsilver", "release_year": 2010, }, }, }, } # Encounter methods to include (excludes gift, legendary-only, etc.) INCLUDED_METHODS = { "walk", "surf", "old-rod", "good-rod", "super-rod", "rock-smash", "headbutt", } # Collect all pokemon dex numbers across games all_pokemon_dex: set[int] = set() def clean_location_name(name: str) -> str: """Convert PokeAPI location slug to a clean display name. e.g. 'kanto-route-1' -> 'Route 1' 'pallet-town' -> 'Pallet Town' """ for prefix in [ "kanto-", "johto-", "hoenn-", "sinnoh-", "unova-", "kalos-", "alola-", "galar-", ]: if name.startswith(prefix): name = name[len(prefix):] break name = name.replace("-", " ").title() name = re.sub(r"Route (\d+)", r"Route \1", name) return name def clean_area_name(area_name: str, location_name: str) -> str | None: """Extract meaningful area suffix, or None if it's the default area.""" if area_name.startswith(location_name): suffix = area_name[len(location_name):].strip("-").strip() if not suffix or suffix == "area": return None return suffix.replace("-", " ").title() return area_name.replace("-", " ").title() def get_encounters_for_area(area_id: int, version_name: str) -> list[dict]: """Get encounter data for a location area, filtered by version.""" area = pb.location_area(area_id) encounters = [] for pe in area.pokemon_encounters: pokemon_url = pe.pokemon.url dex_num = int(pokemon_url.rstrip("/").split("/")[-1]) pokemon_name = pe.pokemon.name for vd in pe.version_details: if vd.version.name != version_name: continue for enc in vd.encounter_details: method = enc.method.name if method not in INCLUDED_METHODS: continue encounters.append({ "pokemon_name": pokemon_name, "national_dex": dex_num, "method": method, "chance": enc.chance, "min_level": enc.min_level, "max_level": enc.max_level, }) return encounters def aggregate_encounters(raw_encounters: list[dict]) -> list[dict]: """Aggregate encounter rates by pokemon + method (sum chances across level ranges).""" agg: dict[tuple[int, str], dict] = {} for enc in raw_encounters: key = (enc["national_dex"], enc["method"]) if key not in agg: agg[key] = { "national_dex": enc["national_dex"], "pokemon_name": enc["pokemon_name"], "method": enc["method"], "encounter_rate": 0, "min_level": enc["min_level"], "max_level": enc["max_level"], } agg[key]["encounter_rate"] += enc["chance"] agg[key]["min_level"] = min(agg[key]["min_level"], enc["min_level"]) agg[key]["max_level"] = max(agg[key]["max_level"], enc["max_level"]) result = list(agg.values()) for r in result: r["encounter_rate"] = min(r["encounter_rate"], 100) return sorted(result, key=lambda x: (-x["encounter_rate"], x["pokemon_name"])) def process_version(version_name: str, vg_info: dict) -> list[dict]: """Process all locations for a specific game version.""" print(f"\n--- Processing {version_name} ---") region = pb.region(vg_info["region_id"]) location_refs = list(region.locations) # For HGSS, also include Kanto locations if version_name in ("heartgold", "soulsilver"): kanto = pb.region(1) location_refs = location_refs + list(kanto.locations) print(f" Found {len(location_refs)} locations") routes = [] order = 1 for loc_ref in location_refs: loc_name = loc_ref.name loc_id = int(loc_ref.url.rstrip("/").split("/")[-1]) display_name = clean_location_name(loc_name) location = pb.location(loc_id) areas = location.areas if not areas: continue all_encounters: list[dict] = [] area_specific: dict[str, list[dict]] = {} for area_ref in areas: area_id = int(area_ref.url.rstrip("/").split("/")[-1]) area_slug = area_ref.name area_suffix = clean_area_name(area_slug, loc_name) encounters = get_encounters_for_area(area_id, version_name) if not encounters: continue if area_suffix and len(areas) > 1: area_specific[area_suffix] = encounters else: all_encounters.extend(encounters) # Area-specific encounters become separate routes if area_specific: for area_suffix, area_encs in area_specific.items(): aggregated = aggregate_encounters(area_encs) if aggregated: route_name = f"{display_name} ({area_suffix})" for enc in aggregated: all_pokemon_dex.add(enc["national_dex"]) routes.append({ "name": route_name, "order": order, "encounters": aggregated, }) order += 1 if all_encounters: aggregated = aggregate_encounters(all_encounters) if aggregated: for enc in aggregated: all_pokemon_dex.add(enc["national_dex"]) routes.append({ "name": display_name, "order": order, "encounters": aggregated, }) order += 1 print(f" Routes with encounters: {len(routes)}") total_enc = sum(len(r["encounters"]) for r in routes) print(f" Total encounter entries: {total_enc}") return routes def fetch_pokemon_data(dex_numbers: set[int]) -> list[dict]: """Fetch Pokemon name/type data for all collected dex numbers.""" print(f"\n--- Fetching {len(dex_numbers)} Pokemon ---") pokemon_list = [] dex_sorted = sorted(dex_numbers) for i, dex in enumerate(dex_sorted, 1): poke = pb.pokemon(dex) types = [t.type.name for t in poke.types] pokemon_list.append({ "national_dex": dex, "name": poke.name.title().replace("-", " "), "types": types, "sprite_url": f"https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/{dex}.png", }) if i % 50 == 0 or i == len(dex_sorted): print(f" Fetched {i}/{len(dex_sorted)}") return sorted(pokemon_list, key=lambda x: x["national_dex"]) def write_json(filename: str, data): path = DATA_DIR / filename with open(path, "w") as f: json.dump(data, f, indent=2) print(f" -> {path}") def main(): # Check for custom base URL if "--base-url" in sys.argv: idx = sys.argv.index("--base-url") base_url = sys.argv[idx + 1] pb_common.BASE_URL = base_url + "/api/v2" print(f"Using custom PokeAPI: {base_url}") else: print("Using public PokeAPI (pokebase caches to disk after first fetch)") DATA_DIR.mkdir(parents=True, exist_ok=True) # Build games.json games = [] for vg_info in VERSION_GROUPS.values(): for game_info in vg_info["games"].values(): games.append({ "name": game_info["name"], "slug": game_info["slug"], "generation": vg_info["generation"], "region": vg_info["region"], "release_year": game_info["release_year"], }) write_json("games.json", games) print(f"Wrote {len(games)} games to games.json") # Process each version for vg_info in VERSION_GROUPS.values(): for ver_name in vg_info["versions"]: routes = process_version(ver_name, vg_info) write_json(f"{ver_name}.json", routes) # Fetch all Pokemon data pokemon = fetch_pokemon_data(all_pokemon_dex) write_json("pokemon.json", pokemon) print(f"\nWrote {len(pokemon)} Pokemon to pokemon.json") print("\nDone! JSON files written to seeds/data/") print("Review route ordering and curate as needed.") if __name__ == "__main__": main()