Validate and regenerate all seed data from PokeDB

- Regenerate seed JSON for all 37 games with more complete PokeDB data
- Add category field to games.json (original/enhanced/remake/sequel/spinoff)
- Include all 1350 pokemon in pokemon.json with types and local sprites
- Build reverse index for PokeDB form lookups (types/sprites for evolutions)
- Move sprites to frontend/public/sprites, reference as /sprites/{id}.webp
- Truncate Sw/Sh den names to fit DB VARCHAR(100) limit
- Deduplicate route names and merge unnamed child areas into parent routes
- Populate 7 previously empty games (Sw/Sh, BDSP, PLA, Sc/Vi)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Julian Tabel
2026-02-11 11:52:51 +01:00
parent df55233c62
commit 872d7872ce
47 changed files with 463655 additions and 129874 deletions

View File

@@ -20,13 +20,18 @@ from .loader import load_pokedb_data, load_seed_config
from .mappings import PokemonMapper, LocationMapper, build_version_map, map_encounter_method
from .output import sort_routes, merge_special_encounters, write_game_json, write_games_json, write_pokemon_json
from .processing import filter_encounters_for_game, process_encounters, build_routes
from .sprites import download_sprites
from .sprites import download_all_sprites, download_sprites
SEEDS_DIR_CANDIDATES = [
Path("backend/src/app/seeds"), # from repo root
Path("../../backend/src/app/seeds"), # from tools/import-pokedb/
]
SPRITES_DIR_CANDIDATES = [
Path("frontend/public/sprites"), # from repo root
Path("../../frontend/public/sprites"), # from tools/import-pokedb/
]
def find_seeds_dir() -> Path:
"""Locate the backend seeds directory."""
@@ -37,6 +42,15 @@ def find_seeds_dir() -> Path:
return Path("backend/src/app/seeds").resolve()
def find_sprites_dir() -> Path:
"""Locate the frontend sprites directory."""
for candidate in SPRITES_DIR_CANDIDATES:
if candidate.parent.exists():
return candidate.resolve()
# Fallback
return Path("frontend/public/sprites").resolve()
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="import-pokedb",
@@ -204,12 +218,15 @@ def main(argv: list[str] | None = None) -> None:
# Write per-game JSON
write_game_json(routes, output_dir, game_slug)
# Download sprites for all encountered pokemon
print("\nDownloading sprites...")
sprites_dir = output_dir / "sprites"
# Download sprites to frontend/public/sprites
sprites_dir = find_sprites_dir()
print(f"\nDownloading sprites to {sprites_dir}...")
sprite_map = download_sprites(pokemon_mapper, all_encountered_form_ids, sprites_dir)
print(f" Sprite map covers {len(sprite_map)} forms")
# Download sprites for ALL pokemon (including non-encountered evolutions etc.)
all_sprite_ids = download_all_sprites(pokemon_mapper, sprites_dir)
# Write global JSON files
print("\nWriting global data files...")
write_games_json(config, output_dir)

View File

@@ -302,6 +302,29 @@ _FORM_SUFFIX_MAP: dict[str, str] = {
}
# PokeDB type IDs → type names (from PokeDB's type system)
TYPE_ID_MAP: dict[int, str] = {
1: "normal",
2: "fighting",
3: "flying",
4: "poison",
5: "ground",
6: "rock",
7: "bug",
8: "ghost",
9: "steel",
10: "fire",
11: "water",
12: "grass",
13: "electric",
14: "psychic",
15: "ice",
16: "dragon",
17: "dark",
18: "fairy",
}
def _normalize_slug(identifier: str) -> str:
"""Normalize a PokeDB pokemon_form_identifier to a PokeAPI-style slug.
@@ -390,6 +413,7 @@ class PokemonMapper:
# Build slug → (pokeapi_id, name) from existing pokemon.json
self._slug_to_info: dict[str, tuple[int, str]] = {}
self._id_to_info: dict[int, tuple[int, str]] = {} # pokeapi_id → (national_dex, name)
self._existing_types: dict[int, list[str]] = {} # pokeapi_id → types (fallback)
self._unmapped: set[str] = set()
if pokemon_json_path.exists():
@@ -401,6 +425,8 @@ class PokemonMapper:
name = p["name"]
ndex = p["national_dex"]
self._id_to_info[pid] = (ndex, name)
if p.get("types"):
self._existing_types[pid] = p["types"]
# Index by base slug (from pokeapi_id for base forms)
slug = _name_to_slug(name)
@@ -413,11 +439,37 @@ class PokemonMapper:
# Build index from PokeDB pokemon_forms.json if it has useful fields
self._pokedb_form_index: dict[str, dict] = {}
# Reverse index: pokeapi_id → PokeDB form record (for non-encountered lookups)
self._id_to_pokedb_form: dict[int, dict] = {}
for form in pokedb.pokemon_forms:
identifier = form.get("identifier", "")
if identifier:
self._pokedb_form_index[identifier] = form
# Build reverse index from pokeapi_id → PokeDB form
# First, for all encountered lookups that succeed, we cache the mapping.
# Here we pre-build for default forms using ndex_id.
for form in pokedb.pokemon_forms:
ndex = form.get("ndex_id")
if ndex and form.get("is_default_form"):
# Default form matches the base species (ndex == pokeapi_id for base forms)
if ndex in self._id_to_info:
self._id_to_pokedb_form[ndex] = form
# Also look for alternate-form pokeapi_ids that share the same ndex
for pid, (p_ndex, _) in self._id_to_info.items():
if p_ndex == ndex and pid not in self._id_to_pokedb_form:
self._id_to_pokedb_form[pid] = form
# Map non-default forms to their specific pokeapi_ids where possible
for form in pokedb.pokemon_forms:
identifier = form.get("identifier", "")
if not identifier or form.get("is_default_form"):
continue
slug = _normalize_slug(identifier)
if slug in self._slug_to_info:
pid, _ = self._slug_to_info[slug]
self._id_to_pokedb_form[pid] = form
def lookup(self, pokemon_form_identifier: str | None) -> tuple[int, str] | None:
"""Look up a PokeDB pokemon_form_identifier.
@@ -487,6 +539,64 @@ class PokemonMapper:
return None
return self._pokedb_form_index.get(pokemon_form_identifier)
def all_pokemon(self) -> list[tuple[int, tuple[int, str]]]:
"""Return all known pokemon as [(pokeapi_id, (national_dex, name)), ...].
Sourced from the existing pokemon.json.
"""
return sorted(self._id_to_info.items())
def get_types_for_id(self, pokeapi_id: int) -> list[str]:
"""Get types for a pokemon by pokeapi_id, looking up via PokeDB form data.
Falls back to existing types from pokemon.json if no PokeDB form found.
"""
form = self._find_form_for_id(pokeapi_id)
if form:
types = []
t1 = form.get("type_1_id")
t2 = form.get("type_2_id")
if t1 and t1 in TYPE_ID_MAP:
types.append(TYPE_ID_MAP[t1])
if t2 and t2 in TYPE_ID_MAP:
types.append(TYPE_ID_MAP[t2])
if types:
return types
# Fallback to existing types from pokemon.json
return self._existing_types.get(pokeapi_id, [])
def _find_form_for_id(self, pokeapi_id: int) -> dict | None:
"""Find the PokeDB form record for a pokeapi_id."""
# Check pre-built reverse index first
if pokeapi_id in self._id_to_pokedb_form:
return self._id_to_pokedb_form[pokeapi_id]
if pokeapi_id not in self._id_to_info:
return None
_, name = self._id_to_info[pokeapi_id]
slug = _name_to_slug(name)
for suffix in ["-default", ""]:
candidate = slug + suffix
if candidate in self._pokedb_form_index:
return self._pokedb_form_index[candidate]
form_slug = _name_to_form_slug(name)
if form_slug:
for suffix in ["-default", ""]:
candidate = form_slug + suffix
if candidate in self._pokedb_form_index:
return self._pokedb_form_index[candidate]
return None
def has_sprite_for_id(self, pokeapi_id: int) -> bool:
"""Check if a sprite exists for a pokemon by pokeapi_id."""
form = self._find_form_for_id(pokeapi_id)
return bool(form and form.get("main_image_normal_path_medium"))
def get_sprite_url_for_id(self, pokeapi_id: int) -> str | None:
"""Get the PokeDB CDN sprite URL for a pokemon by pokeapi_id."""
form = self._find_form_for_id(pokeapi_id)
return form.get("main_image_normal_path_medium") if form else None
def report_unmapped(self) -> None:
"""Print warnings for any unmapped identifiers."""
if self._unmapped:

View File

@@ -3,15 +3,42 @@
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
from typing import Any
from .loader import SeedConfig
from .mappings import PokemonMapper
from .mappings import TYPE_ID_MAP, PokemonMapper
from .models import Encounter, Route
from .sprites import sprite_path_for_pokemon
# Max route name length (matches DB column VARCHAR(100))
_MAX_ROUTE_NAME_LEN = 100
# Pattern for Sw/Sh den area names: "Location (Den X - long description - Common/Rare)"
_DEN_NAME_RE = re.compile(r"^(.+?) \(Den ([A-Z]\d*) - .+ - (Common|Rare)\)$")
def _truncate_route_name(name: str) -> str:
"""Truncate a route name to fit the database column limit.
Applies smart truncation for known patterns (e.g. Sw/Sh den descriptions).
"""
if len(name) <= _MAX_ROUTE_NAME_LEN:
return name
# Sw/Sh dens: shorten "Location (Den X - long desc - Common/Rare)" → "Location (Den X - Common/Rare)"
m = _DEN_NAME_RE.match(name)
if m:
shortened = f"{m.group(1)} (Den {m.group(2)} - {m.group(3)})"
if len(shortened) <= _MAX_ROUTE_NAME_LEN:
return shortened
# Generic truncation: cut at last space before limit, add ellipsis
truncated = name[:_MAX_ROUTE_NAME_LEN - 1].rsplit(" ", 1)[0] + ""
return truncated
# ---------------------------------------------------------------------------
# Route ordering
@@ -133,24 +160,66 @@ def write_json(path: Path, data: Any) -> None:
print(f" -> {path}")
def _deduplicate_names(routes: list[Route]) -> None:
"""Ensure all route names are unique by appending a numeric suffix to duplicates."""
seen: dict[str, int] = {}
def _unique(name: str) -> str:
if name not in seen:
seen[name] = 1
return name
seen[name] += 1
return f"{name} #{seen[name]}"
for route in routes:
route.name = _unique(_truncate_route_name(route.name))
for child in route.children:
child.name = _unique(_truncate_route_name(child.name))
def write_game_json(routes: list[Route], output_dir: Path, game_slug: str) -> None:
"""Write a per-game route/encounter JSON file."""
_deduplicate_names(routes)
data = [r.to_dict() for r in routes]
write_json(output_dir / f"{game_slug}.json", data)
_GAME_CATEGORY: dict[str, str] = {
"red": "original", "blue": "original", "yellow": "enhanced",
"gold": "original", "silver": "original", "crystal": "enhanced",
"ruby": "original", "sapphire": "original", "emerald": "enhanced",
"firered": "remake", "leafgreen": "remake",
"diamond": "original", "pearl": "original", "platinum": "enhanced",
"heartgold": "remake", "soulsilver": "remake",
"black": "original", "white": "original",
"black-2": "sequel", "white-2": "sequel",
"x": "original", "y": "original",
"omega-ruby": "remake", "alpha-sapphire": "remake",
"sun": "original", "moon": "original",
"ultra-sun": "enhanced", "ultra-moon": "enhanced",
"lets-go-pikachu": "remake", "lets-go-eevee": "remake",
"sword": "original", "shield": "original",
"brilliant-diamond": "remake", "shining-pearl": "remake",
"legends-arceus": "spinoff",
"scarlet": "original", "violet": "original",
"legends-z-a": "spinoff",
}
def write_games_json(config: SeedConfig, output_dir: Path) -> None:
"""Write games.json from version_groups config."""
games = []
for vg_info in config.version_groups.values():
for game_info in vg_info.get("games", {}).values():
slug = game_info["slug"]
games.append({
"name": game_info["name"],
"slug": game_info["slug"],
"slug": slug,
"generation": vg_info["generation"],
"region": vg_info["region"],
"release_year": game_info["release_year"],
"color": game_info.get("color"),
"category": _GAME_CATEGORY.get(slug, "original"),
})
write_json(output_dir / "games.json", games)
print(f" Wrote {len(games)} games")
@@ -162,31 +231,36 @@ def write_pokemon_json(
sprite_map: dict[str, str],
output_dir: Path,
) -> None:
"""Write pokemon.json with all pokemon referenced in encounters."""
seen_ids: set[int] = set()
pokemon_list: list[dict[str, Any]] = []
"""Write pokemon.json with all known pokemon.
for form_id in sorted(encountered_form_ids):
Includes all pokemon from the existing pokemon.json (base data),
enriched with PokeDB types and sprite paths for encountered forms.
"""
# Build a mapping of pokeapi_id → (form_id, form_data) for encountered forms
encountered_by_id: dict[int, tuple[str, dict[str, Any] | None]] = {}
for form_id in encountered_form_ids:
info = pokemon_mapper.lookup(form_id)
if info is None:
continue
pokeapi_id, _ = info
if pokeapi_id not in encountered_by_id:
form_data = pokemon_mapper.get_form_data(form_id)
encountered_by_id[pokeapi_id] = (form_id, form_data)
pokeapi_id, name = info
if pokeapi_id in seen_ids:
continue
seen_ids.add(pokeapi_id)
pokemon_list: list[dict[str, Any]] = []
# Get additional data from PokeDB form record
form_data = pokemon_mapper.get_form_data(form_id)
national_dex = form_data.get("ndex_id", pokeapi_id) if form_data else pokeapi_id
# Types from PokeDB form data
types = _extract_types(form_data) if form_data else []
# Sprite URL
sprite_url: str | None = None
if form_id in sprite_map:
sprite_url = sprite_path_for_pokemon(pokeapi_id)
for pokeapi_id, (ndex, name) in pokemon_mapper.all_pokemon():
# Enrich with PokeDB data if this pokemon was encountered
if pokeapi_id in encountered_by_id:
form_id, form_data = encountered_by_id[pokeapi_id]
types = _extract_types(form_data) if form_data else []
sprite_url = sprite_path_for_pokemon(pokeapi_id) if form_id in sprite_map else None
national_dex = form_data.get("ndex_id", ndex) if form_data else ndex
else:
# Not encountered — use existing data, try to find PokeDB form for types
types = pokemon_mapper.get_types_for_id(pokeapi_id)
sprite_url = sprite_path_for_pokemon(pokeapi_id) if pokemon_mapper.has_sprite_for_id(pokeapi_id) else None
national_dex = ndex
pokemon_list.append({
"pokeapi_id": pokeapi_id,
@@ -203,36 +277,13 @@ def write_pokemon_json(
print(f" Wrote {len(pokemon_list)} pokemon")
# PokeDB type IDs → type names (from PokeDB's type system)
_TYPE_ID_MAP: dict[int, str] = {
1: "normal",
2: "fighting",
3: "flying",
4: "poison",
5: "ground",
6: "rock",
7: "bug",
8: "ghost",
9: "steel",
10: "fire",
11: "water",
12: "grass",
13: "electric",
14: "psychic",
15: "ice",
16: "dragon",
17: "dark",
18: "fairy",
}
def _extract_types(form_data: dict[str, Any]) -> list[str]:
"""Extract type names from a PokeDB form record."""
types = []
type1_id = form_data.get("type_1_id")
type2_id = form_data.get("type_2_id")
if type1_id and type1_id in _TYPE_ID_MAP:
types.append(_TYPE_ID_MAP[type1_id])
if type2_id and type2_id in _TYPE_ID_MAP:
types.append(_TYPE_ID_MAP[type2_id])
if type1_id and type1_id in TYPE_ID_MAP:
types.append(TYPE_ID_MAP[type1_id])
if type2_id and type2_id in TYPE_ID_MAP:
types.append(TYPE_ID_MAP[type2_id])
return types

View File

@@ -311,28 +311,33 @@ def build_routes(
else:
# Multiple areas — check if encounters differ
children: list[Route] = []
all_encounters: list[Encounter] = []
# Encounters for areas with no distinct name get merged into parent
parent_encounters: list[Encounter] = []
for _, area_name, encounters in areas:
aggregated = aggregate_encounters(encounters)
if aggregated:
if area_name and area_name != loc_name:
child_name = area_name
children.append(Route(name=area_name, order=0, encounters=aggregated))
else:
child_name = loc_name
children.append(Route(name=child_name, order=0, encounters=aggregated))
all_encounters.extend(encounters)
# No distinct area name — merge into parent
parent_encounters.extend(aggregated)
if len(children) > 1:
# Parent with children
if children:
# Parent with children (parent may also have its own encounters)
parent_agg = aggregate_encounters(parent_encounters) if parent_encounters else []
routes.append(Route(
name=loc_name,
order=0,
encounters=[],
encounters=parent_agg,
children=children,
))
elif len(children) == 1:
# Only one area had encounters — flatten
routes.append(children[0])
elif parent_encounters:
# All areas had same name — flatten into single route
routes.append(Route(
name=loc_name,
order=0,
encounters=aggregate_encounters(parent_encounters),
))
return routes

View File

@@ -70,9 +70,61 @@ def download_sprites(
return result
def sprite_path_for_pokemon(pokeapi_id: int, sprites_dir_name: str = "sprites") -> str:
"""Generate the relative sprite path for use in pokemon.json.
def download_all_sprites(
pokemon_mapper: PokemonMapper,
sprites_dir: Path,
) -> set[int]:
"""Download sprites for all known pokemon (not just encountered ones).
Returns a path like "sprites/25.webp" suitable for the sprite_url field.
Returns a set of pokeapi_ids that have sprites downloaded.
"""
return f"{sprites_dir_name}/{pokeapi_id}.webp"
sprites_dir.mkdir(parents=True, exist_ok=True)
to_download: list[tuple[int, str, Path]] = []
have_sprites: set[int] = set()
for pokeapi_id, (_ndex, _name) in pokemon_mapper.all_pokemon():
if pokemon_mapper.has_sprite_for_id(pokeapi_id):
filename = f"{pokeapi_id}.webp"
dest = sprites_dir / filename
have_sprites.add(pokeapi_id)
if not dest.exists():
# Get the sprite URL via the mapper
url = pokemon_mapper.get_sprite_url_for_id(pokeapi_id)
if url:
to_download.append((pokeapi_id, url, dest))
if not to_download:
print(f" All sprites: {len(have_sprites)} already cached")
return have_sprites
print(f" Downloading {len(to_download)} additional sprites ({len(have_sprites) - len(to_download)} cached)...")
failed = 0
for i, (pid, url, dest) in enumerate(to_download, 1):
try:
urllib.request.urlretrieve(url, dest)
except Exception as e:
print(f" Warning: Failed to download sprite for pokemon {pid}: {e}", file=sys.stderr)
failed += 1
have_sprites.discard(pid)
if i % 100 == 0:
print(f" {i}/{len(to_download)}...")
if failed:
print(f" All sprites: {len(have_sprites)} downloaded, {failed} failed")
else:
print(f" All sprites: {len(have_sprites)} total ({len(to_download)} new)")
return have_sprites
def sprite_path_for_pokemon(pokeapi_id: int) -> str:
"""Generate the sprite URL path for use in pokemon.json.
Returns an absolute path like "/sprites/25.webp" for the frontend
(files in frontend/public/ are served at the root).
"""
return f"/sprites/{pokeapi_id}.webp"