Add core encounter processing pipeline

Filter by game version, parse levels and rate variants across all
generations, aggregate encounters by pokemon+method, and build
parent/child route hierarchy. Also completes encounter method coverage
(73/73) and pokemon form mapping (1180/1181) with manual overrides.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Julian Tabel
2026-02-11 10:12:55 +01:00
parent df7ea64b9e
commit d80c59047c
5 changed files with 557 additions and 13 deletions

View File

@@ -89,15 +89,42 @@ ENCOUNTER_METHOD_MAP: dict[str, str] = {
"ambush": "walk",
# Seaweed / diving
"diving": "surf",
"diving-seaweed": "surf",
"seaweed": "surf",
# Raids
"max-raid": "raid",
"max-raid-battle": "raid",
"dynamax-adventure": "raid",
"tera-raid": "raid",
"tera-raid-battle": "raid",
"fixed-tera-encounter": "static",
# Misc
"roaming": "roaming",
"safari-zone": "walk",
"bug-contest": "walk",
"dust-cloud": "walk",
"hidden-grotto": "static",
"hidden-encounter": "walk",
"horde-encounter": "walk",
"shaking-trees": "walk",
"shaking-ore-deposits": "walk",
"island-scan": "static",
"mass-outbreak": "swarm",
"npc-buy": "gift",
"special-encounter": "static",
"sea-skim": "surf",
"midair": "walk",
"mr-backlot": "walk",
"hoenn-sound": "walk",
"sinnoh-sound": "walk",
"curry": "gift",
"boxes": "gift",
"berry-tree": "walk",
"zygarde-cube-assemble": "static",
"contact-flock": "walk",
"contact-space-time-distortion": "walk",
"contact-unown-reasearch-notes": "static",
"flying-pokemon-shadow": "walk",
}
# Prefix-based fallbacks for methods not explicitly listed above.
@@ -107,6 +134,8 @@ _METHOD_PREFIX_MAP: list[tuple[str, str]] = [
("fishing-", "fishing"),
("headbutt-", "headbutt"),
("flying-", "walk"),
("ambush-", "walk"),
("contact-", "walk"),
]
@@ -180,14 +209,119 @@ def build_version_map(
# Pokemon form mapping
# ---------------------------------------------------------------------------
# PokeDB uses adjectival region forms ("alolan") while PokeAPI/our data uses
# region names ("alola"). This maps PokeDB suffixes → our suffixes.
_FORM_SUFFIX_MAP: dict[str, str] = {
"alolan": "alola",
"galarian": "galar",
"hisuian": "hisui",
"paldean": "paldea",
# Totem forms
"alolan-totem": "totem-alola",
# Basculin stripes
"blue-stripe": "blue-striped",
"red-stripe": "red-striped",
"white-stripe": "white-striped",
# Sea forms
"west-sea": "west",
"east-sea": "east",
# Cloak forms
"plant-cloak": "plant",
"sandy-cloak": "sandy",
"trash-cloak": "trash",
# Eiscue
"ice-face": "ice",
# Misc forms
"pompom": "pom-pom",
"10p": "10",
"50p": "50",
"owntempo": "own-tempo",
"two": "two-segment",
"chest": "chest-form",
"ice-rider": "ice",
"shadow-rider": "shadow",
"apex": "apex-build",
"ultimate": "ultimate-mode",
"black-activated": "black",
"white-activated": "white",
"hero": "hero",
"sword": "crowned",
"shield": "crowned",
# Gigantamax
"gigantamax": "gmax",
# Partner forms
"partner": "partner-cap",
# Flabébé / Floette / Florges color forms — these don't have form suffixes in our data
# since each color is just the base form. Map to base.
"blue": "blue",
"orange": "orange",
"red": "red",
"white": "white",
"yellow": "yellow",
# Gender forms
"female": "female",
"male": "male",
# Furfrou
"natural": "natural",
# Cherrim
"overcast": "overcast",
# Sinistea / Polteageist
"antique": "antique",
"phony": "phony",
# Poltchageist / Sinistcha
"artisan": "artisan",
"counterfeit": "counterfeit",
"masterpiece": "masterpiece",
"unremarkable": "unremarkable",
# Minior cores
"blue-core": "blue",
"green-core": "green",
"indigo-core": "indigo",
"orange-core": "orange",
"red-core": "red",
"violet-core": "violet",
"yellow-core": "yellow",
# Vivillon
"fancy": "fancy",
# Squawkabilly
# these use same name
# Xerneas
"neutral": "neutral",
# Deerling / Sawsbuck
"spring": "spring",
"summer": "summer",
"autumn": "autumn",
"winter": "winter",
# Spiky-ears Pichu
"spiky-ears": "spiky-eared",
# Paldean breeds
"paldean-combat-breed": "paldea-combat-breed",
"paldean-blaze-breed": "paldea-blaze-breed",
"paldean-aqua-breed": "paldea-aqua-breed",
}
def _normalize_slug(identifier: str) -> str:
"""Normalize a PokeDB pokemon_form_identifier to a PokeAPI-style slug.
PokeDB uses "pidgey-default" for base forms — strip the "-default" suffix.
Non-default forms like "rattata-alola" are already PokeAPI-style slugs.
For alternate forms, translate PokeDB naming conventions to ours.
"""
if identifier.endswith("-default"):
return identifier[: -len("-default")]
# Try suffix-based mapping: split into species + form suffix
# e.g. "rattata-alolan" → species="rattata", suffix="alolan"
# e.g. "mr-mime-galarian" → need to find the right split point
# Strategy: try longest suffix first
for pokedb_suffix, our_suffix in sorted(
_FORM_SUFFIX_MAP.items(), key=lambda x: -len(x[0])
):
if identifier.endswith("-" + pokedb_suffix):
species = identifier[: -(len(pokedb_suffix) + 1)]
return f"{species}-{our_suffix}"
return identifier
@@ -234,6 +368,21 @@ def _name_to_form_slug(name: str) -> str | None:
return None
# Manual overrides for PokeDB identifiers that can't be resolved generically.
# These are cases where our pokemon.json uses non-standard base form names
# (e.g. "Deoxys Normal" instead of "Deoxys").
_FORM_OVERRIDES: dict[str, tuple[int, str]] = {
"deoxys-default": (386, "Deoxys Normal"),
"darmanitan-galarian": (10177, "Darmanitan (Galar Standard)"),
"mimikyu-totem": (10144, "Mimikyu (Totem Disguised)"),
"squawkabilly-green": (931, "Squawkabilly Green Plumage"),
"squawkabilly-blue": (10260, "Squawkabilly (Blue Plumage)"),
"squawkabilly-white": (10262, "Squawkabilly (White Plumage)"),
"squawkabilly-yellow": (10261, "Squawkabilly (Yellow Plumage)"),
"toxtricity-gigantamax": (849, "Toxtricity Amped"),
}
class PokemonMapper:
"""Maps PokeDB pokemon_form_identifier → (pokeapi_id, display_name)."""
@@ -277,6 +426,10 @@ class PokemonMapper:
if not pokemon_form_identifier:
return None
# Check manual overrides first
if pokemon_form_identifier in _FORM_OVERRIDES:
return _FORM_OVERRIDES[pokemon_form_identifier]
slug = _normalize_slug(pokemon_form_identifier)
# Direct slug match
@@ -292,6 +445,19 @@ class PokemonMapper:
self._slug_to_info[slug] = (pokemon_id, name)
return (pokemon_id, name)
# Fallback: strip form suffix to find base species.
# Many cosmetic forms (colors, genders, seasons) don't have separate
# entries in our pokemon.json — they use the base species entry.
# Try progressively shorter slugs: "flabebe-blue" → "flabebe"
parts = slug.split("-")
for i in range(len(parts) - 1, 0, -1):
base = "-".join(parts[:i])
if base in self._slug_to_info:
result = self._slug_to_info[base]
# Cache for future lookups
self._slug_to_info[slug] = result
return result
# Track unmapped
if pokemon_form_identifier not in self._unmapped:
self._unmapped.add(pokemon_form_identifier)