Add Python tool scaffold for PokeDB data import

Set up tools/import-pokedb/ with CLI, JSON loader, and output models.
Replaces the Go/PokeAPI approach with local PokeDB.org JSON processing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Julian Tabel
2026-02-11 09:49:51 +01:00
parent 5151be785b
commit 1aa67665ff
11 changed files with 522 additions and 23 deletions

View File

@@ -0,0 +1,115 @@
"""CLI entry point for the PokeDB import tool.
Usage:
# From repo root:
python -m import_pokedb ./pokedb-export/
# With options:
python -m import_pokedb ./pokedb-export/ --output backend/src/app/seeds/data/ --game firered
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from .loader import load_pokedb_data, load_seed_config
SEEDS_DIR_CANDIDATES = [
Path("backend/src/app/seeds"), # from repo root
Path("../../backend/src/app/seeds"), # from tools/import-pokedb/
]
def find_seeds_dir() -> Path:
"""Locate the backend seeds directory."""
for candidate in SEEDS_DIR_CANDIDATES:
if (candidate / "version_groups.json").exists():
return candidate.resolve()
# Fallback
return Path("backend/src/app/seeds").resolve()
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="import-pokedb",
description="Convert PokeDB.org JSON data exports into nuzlocke-tracker seed format.",
)
parser.add_argument(
"pokedb_dir",
type=Path,
help="Path to directory containing PokeDB JSON export files",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Output directory for seed JSON files (default: backend/src/app/seeds/data/)",
)
parser.add_argument(
"--game",
type=str,
default=None,
help="Generate data for a specific game slug only (default: all games)",
)
return parser
def main(argv: list[str] | None = None) -> None:
parser = build_parser()
args = parser.parse_args(argv)
pokedb_dir: Path = args.pokedb_dir
if not pokedb_dir.is_dir():
print(f"Error: {pokedb_dir} is not a directory", file=sys.stderr)
sys.exit(1)
seeds_dir = find_seeds_dir()
output_dir: Path = args.output or (seeds_dir / "data")
output_dir.mkdir(parents=True, exist_ok=True)
print(f"PokeDB data: {pokedb_dir.resolve()}")
print(f"Seeds config: {seeds_dir}")
print(f"Output: {output_dir.resolve()}")
print()
# Load PokeDB export data
pokedb = load_pokedb_data(pokedb_dir)
print(pokedb.summary())
print()
# Load existing seed configuration
config = load_seed_config(seeds_dir)
print(f"Loaded {len(config.version_groups)} version groups")
print(f"Loaded route order for {len(config.route_order)} version groups")
if config.special_encounters:
se_count = len(config.special_encounters.get("encounters", {}))
print(f"Loaded special encounters for {se_count} version groups")
print()
# Determine which games to process
target_game = args.game
if target_game:
found = False
for vg_info in config.version_groups.values():
if target_game in vg_info.get("versions", []):
found = True
break
if not found:
print(f"Error: Game '{target_game}' not found in version_groups.json", file=sys.stderr)
sys.exit(1)
print(f"Target: {target_game}")
else:
total_games = sum(
len(vg.get("versions", []))
for vg in config.version_groups.values()
)
print(f"Target: all {total_games} games")
# TODO: Processing pipeline (subtasks zno2, rfg0, gkcy)
print("\nScaffold loaded successfully. Processing pipeline not yet implemented.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,150 @@
"""Load and validate PokeDB JSON export files."""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any
REQUIRED_FILES = [
"encounters.json",
"locations.json",
"location_areas.json",
"encounter_methods.json",
"versions.json",
"pokemon_forms.json",
]
class PokeDBData:
"""Container for all loaded PokeDB export data."""
def __init__(
self,
encounters: list[dict[str, Any]],
locations: list[dict[str, Any]],
location_areas: list[dict[str, Any]],
encounter_methods: list[dict[str, Any]],
versions: list[dict[str, Any]],
pokemon_forms: list[dict[str, Any]],
) -> None:
self.encounters = encounters
self.locations = locations
self.location_areas = location_areas
self.encounter_methods = encounter_methods
self.versions = versions
self.pokemon_forms = pokemon_forms
def summary(self) -> str:
return (
f"PokeDB data loaded:\n"
f" encounters: {len(self.encounters):,}\n"
f" locations: {len(self.locations):,}\n"
f" location_areas: {len(self.location_areas):,}\n"
f" encounter_methods: {len(self.encounter_methods):,}\n"
f" versions: {len(self.versions):,}\n"
f" pokemon_forms: {len(self.pokemon_forms):,}"
)
def load_pokedb_data(data_dir: Path) -> PokeDBData:
"""Load all PokeDB JSON export files from a directory.
Exits with an error message if any required files are missing or unparseable.
"""
missing = [f for f in REQUIRED_FILES if not (data_dir / f).exists()]
if missing:
print(
f"Error: Missing required PokeDB files in {data_dir}:",
file=sys.stderr,
)
for f in missing:
print(f" - {f}", file=sys.stderr)
print(
"\nDownload the JSON export from https://pokedb.org/data-export",
file=sys.stderr,
)
sys.exit(1)
def _load(filename: str) -> list[dict[str, Any]]:
path = data_dir / filename
try:
with open(path) as f:
data = json.load(f)
except json.JSONDecodeError as e:
print(f"Error: Failed to parse {path}: {e}", file=sys.stderr)
sys.exit(1)
if not isinstance(data, list):
print(
f"Error: Expected a JSON array in {path}, got {type(data).__name__}",
file=sys.stderr,
)
sys.exit(1)
return data
return PokeDBData(
encounters=_load("encounters.json"),
locations=_load("locations.json"),
location_areas=_load("location_areas.json"),
encounter_methods=_load("encounter_methods.json"),
versions=_load("versions.json"),
pokemon_forms=_load("pokemon_forms.json"),
)
class SeedConfig:
"""Container for existing seed configuration files."""
def __init__(
self,
version_groups: dict[str, Any],
route_order: dict[str, list[str]],
special_encounters: dict[str, Any] | None,
) -> None:
self.version_groups = version_groups
self.route_order = route_order
self.special_encounters = special_encounters
def load_seed_config(seeds_dir: Path) -> SeedConfig:
"""Load existing seed configuration files (version_groups, route_order, etc.).
Exits with an error message if required config files are missing.
"""
vg_path = seeds_dir / "version_groups.json"
if not vg_path.exists():
print(f"Error: version_groups.json not found at {vg_path}", file=sys.stderr)
sys.exit(1)
with open(vg_path) as f:
version_groups = json.load(f)
# Load route_order.json and resolve aliases
ro_path = seeds_dir / "route_order.json"
if not ro_path.exists():
print(f"Error: route_order.json not found at {ro_path}", file=sys.stderr)
sys.exit(1)
with open(ro_path) as f:
ro_raw = json.load(f)
route_order: dict[str, list[str]] = dict(ro_raw.get("routes", {}))
for alias, target in ro_raw.get("aliases", {}).items():
if target in route_order:
route_order[alias] = route_order[target]
# Load special_encounters.json (optional)
se_path = seeds_dir / "special_encounters.json"
special_encounters = None
if se_path.exists():
with open(se_path) as f:
special_encounters = json.load(f)
return SeedConfig(
version_groups=version_groups,
route_order=route_order,
special_encounters=special_encounters,
)

View File

@@ -0,0 +1,81 @@
"""Output data models matching the existing seed JSON format."""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class Encounter:
pokeapi_id: int
pokemon_name: str
method: str
encounter_rate: int
min_level: int
max_level: int
def to_dict(self) -> dict:
return {
"pokeapi_id": self.pokeapi_id,
"pokemon_name": self.pokemon_name,
"method": self.method,
"encounter_rate": self.encounter_rate,
"min_level": self.min_level,
"max_level": self.max_level,
}
@dataclass
class Route:
name: str
order: int
encounters: list[Encounter] = field(default_factory=list)
children: list[Route] = field(default_factory=list)
def to_dict(self) -> dict:
d: dict = {
"name": self.name,
"order": self.order,
"encounters": [e.to_dict() for e in self.encounters],
}
if self.children:
d["children"] = [c.to_dict() for c in self.children]
return d
@dataclass
class Game:
name: str
slug: str
generation: int
region: str
release_year: int
color: str | None = None
def to_dict(self) -> dict:
return {
"name": self.name,
"slug": self.slug,
"generation": self.generation,
"region": self.region,
"release_year": self.release_year,
"color": self.color,
}
@dataclass
class Pokemon:
pokeapi_id: int
national_dex: int
name: str
types: list[str]
sprite_url: str
def to_dict(self) -> dict:
return {
"pokeapi_id": self.pokeapi_id,
"national_dex": self.national_dex,
"name": self.name,
"types": self.types,
"sprite_url": self.sprite_url,
}

View File

@@ -0,0 +1,9 @@
[project]
name = "import-pokedb"
version = "0.1.0"
description = "Convert PokeDB.org JSON data exports into nuzlocke-tracker seed format"
requires-python = ">=3.12"
dependencies = []
[project.scripts]
import-pokedb = "import_pokedb.__main__:main"