Add --prune flag to seed command to remove stale data
Without --prune, seeds continue to only upsert (add/update). With --prune, routes, encounters, and bosses not present in the seed JSON files are deleted from the database. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
# nuzlocke-tracker-ecn3
|
||||||
|
title: Prune stale seed data during seeding
|
||||||
|
status: completed
|
||||||
|
type: bug
|
||||||
|
priority: normal
|
||||||
|
created_at: 2026-02-21T16:28:37Z
|
||||||
|
updated_at: 2026-02-21T16:29:43Z
|
||||||
|
---
|
||||||
|
|
||||||
|
Seeds only upsert (add/update), they never remove routes, encounters, or bosses that no longer exist in the seed JSON. When routes are renamed, old route names persist in production.
|
||||||
|
|
||||||
|
## Fix
|
||||||
|
|
||||||
|
After upserting each entity type, delete rows not present in the seed data:
|
||||||
|
|
||||||
|
1. **Routes**: After upserting all routes for a version group, delete routes whose names are not in the seed set. FK cascades handle child routes and encounters.
|
||||||
|
2. **Encounters**: After upserting encounters for a route+game, delete encounters not in the seed data for that route+game pair.
|
||||||
|
3. **Bosses**: After upserting bosses for a version group, delete bosses with order values beyond what the seed provides.
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python -m app.seeds # Run seed
|
python -m app.seeds # Run seed
|
||||||
|
python -m app.seeds --prune # Run seed and remove stale data not in seed files
|
||||||
python -m app.seeds --verify # Run seed + verification
|
python -m app.seeds --verify # Run seed + verification
|
||||||
python -m app.seeds --export # Export all seed data from DB to JSON files
|
python -m app.seeds --export # Export all seed data from DB to JSON files
|
||||||
"""
|
"""
|
||||||
@@ -21,7 +22,8 @@ async def main():
|
|||||||
await export_all()
|
await export_all()
|
||||||
return
|
return
|
||||||
|
|
||||||
await seed()
|
prune = "--prune" in sys.argv
|
||||||
|
await seed(prune=prune)
|
||||||
if "--verify" in sys.argv:
|
if "--verify" in sys.argv:
|
||||||
await verify()
|
await verify()
|
||||||
|
|
||||||
|
|||||||
@@ -124,11 +124,14 @@ async def upsert_routes(
|
|||||||
session: AsyncSession,
|
session: AsyncSession,
|
||||||
version_group_id: int,
|
version_group_id: int,
|
||||||
routes: list[dict],
|
routes: list[dict],
|
||||||
|
*,
|
||||||
|
prune: bool = False,
|
||||||
) -> dict[str, int]:
|
) -> dict[str, int]:
|
||||||
"""Upsert route records for a version group, return {name: id} mapping.
|
"""Upsert route records for a version group, return {name: id} mapping.
|
||||||
|
|
||||||
Handles hierarchical routes: routes with 'children' are parent routes,
|
Handles hierarchical routes: routes with 'children' are parent routes,
|
||||||
and their children get parent_route_id set accordingly.
|
and their children get parent_route_id set accordingly.
|
||||||
|
When prune is True, deletes routes not present in the seed data.
|
||||||
"""
|
"""
|
||||||
# First pass: upsert all parent routes (without parent_route_id)
|
# First pass: upsert all parent routes (without parent_route_id)
|
||||||
for route in routes:
|
for route in routes:
|
||||||
@@ -185,6 +188,27 @@ async def upsert_routes(
|
|||||||
|
|
||||||
await session.flush()
|
await session.flush()
|
||||||
|
|
||||||
|
if prune:
|
||||||
|
seed_names: set[str] = set()
|
||||||
|
for route in routes:
|
||||||
|
seed_names.add(route["name"])
|
||||||
|
for child in route.get("children", []):
|
||||||
|
seed_names.add(child["name"])
|
||||||
|
|
||||||
|
pruned = await session.execute(
|
||||||
|
delete(Route)
|
||||||
|
.where(
|
||||||
|
Route.version_group_id == version_group_id,
|
||||||
|
Route.name.not_in(seed_names),
|
||||||
|
)
|
||||||
|
.returning(Route.id)
|
||||||
|
)
|
||||||
|
pruned_count = len(pruned.all())
|
||||||
|
if pruned_count:
|
||||||
|
print(f" Pruned {pruned_count} stale route(s)")
|
||||||
|
|
||||||
|
await session.flush()
|
||||||
|
|
||||||
# Return full mapping including children
|
# Return full mapping including children
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(Route.name, Route.id).where(Route.version_group_id == version_group_id)
|
select(Route.name, Route.id).where(Route.version_group_id == version_group_id)
|
||||||
@@ -233,8 +257,15 @@ async def upsert_route_encounters(
|
|||||||
encounters: list[dict],
|
encounters: list[dict],
|
||||||
dex_to_id: dict[int, int],
|
dex_to_id: dict[int, int],
|
||||||
game_id: int,
|
game_id: int,
|
||||||
|
*,
|
||||||
|
prune: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Upsert encounters for a route and game, return count of upserted rows."""
|
"""Upsert encounters for a route and game, return count of upserted rows.
|
||||||
|
|
||||||
|
When prune is True, deletes encounters not present in the seed data.
|
||||||
|
"""
|
||||||
|
seed_keys: set[tuple[int, str, str]] = set()
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for enc in encounters:
|
for enc in encounters:
|
||||||
pokemon_id = dex_to_id.get(enc["pokeapi_id"])
|
pokemon_id = dex_to_id.get(enc["pokeapi_id"])
|
||||||
@@ -245,6 +276,7 @@ async def upsert_route_encounters(
|
|||||||
conditions = enc.get("conditions")
|
conditions = enc.get("conditions")
|
||||||
if conditions:
|
if conditions:
|
||||||
for condition_name, rate in conditions.items():
|
for condition_name, rate in conditions.items():
|
||||||
|
seed_keys.add((pokemon_id, enc["method"], condition_name))
|
||||||
await _upsert_single_encounter(
|
await _upsert_single_encounter(
|
||||||
session,
|
session,
|
||||||
route_id,
|
route_id,
|
||||||
@@ -258,6 +290,7 @@ async def upsert_route_encounters(
|
|||||||
)
|
)
|
||||||
count += 1
|
count += 1
|
||||||
else:
|
else:
|
||||||
|
seed_keys.add((pokemon_id, enc["method"], ""))
|
||||||
await _upsert_single_encounter(
|
await _upsert_single_encounter(
|
||||||
session,
|
session,
|
||||||
route_id,
|
route_id,
|
||||||
@@ -270,6 +303,23 @@ async def upsert_route_encounters(
|
|||||||
)
|
)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
if prune:
|
||||||
|
existing = await session.execute(
|
||||||
|
select(RouteEncounter).where(
|
||||||
|
RouteEncounter.route_id == route_id,
|
||||||
|
RouteEncounter.game_id == game_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
stale_ids = [
|
||||||
|
row.id
|
||||||
|
for row in existing.scalars()
|
||||||
|
if (row.pokemon_id, row.encounter_method, row.condition) not in seed_keys
|
||||||
|
]
|
||||||
|
if stale_ids:
|
||||||
|
await session.execute(
|
||||||
|
delete(RouteEncounter).where(RouteEncounter.id.in_(stale_ids))
|
||||||
|
)
|
||||||
|
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
@@ -280,8 +330,13 @@ async def upsert_bosses(
|
|||||||
dex_to_id: dict[int, int],
|
dex_to_id: dict[int, int],
|
||||||
route_name_to_id: dict[str, int] | None = None,
|
route_name_to_id: dict[str, int] | None = None,
|
||||||
slug_to_game_id: dict[str, int] | None = None,
|
slug_to_game_id: dict[str, int] | None = None,
|
||||||
|
*,
|
||||||
|
prune: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Upsert boss battles for a version group, return count of bosses upserted."""
|
"""Upsert boss battles for a version group, return count of bosses upserted.
|
||||||
|
|
||||||
|
When prune is True, deletes boss battles not present in the seed data.
|
||||||
|
"""
|
||||||
count = 0
|
count = 0
|
||||||
for boss in bosses:
|
for boss in bosses:
|
||||||
# Resolve after_route_name to an ID
|
# Resolve after_route_name to an ID
|
||||||
@@ -364,6 +419,20 @@ async def upsert_bosses(
|
|||||||
|
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
if prune:
|
||||||
|
seed_orders = {boss["order"] for boss in bosses}
|
||||||
|
pruned = await session.execute(
|
||||||
|
delete(BossBattle)
|
||||||
|
.where(
|
||||||
|
BossBattle.version_group_id == version_group_id,
|
||||||
|
BossBattle.order.not_in(seed_orders),
|
||||||
|
)
|
||||||
|
.returning(BossBattle.id)
|
||||||
|
)
|
||||||
|
pruned_count = len(pruned.all())
|
||||||
|
if pruned_count:
|
||||||
|
print(f" Pruned {pruned_count} stale boss battle(s)")
|
||||||
|
|
||||||
await session.flush()
|
await session.flush()
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|||||||
@@ -38,9 +38,12 @@ def load_json(filename: str):
|
|||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
async def seed():
|
async def seed(*, prune: bool = False):
|
||||||
"""Run the full seed process."""
|
"""Run the full seed process.
|
||||||
print("Starting seed...")
|
|
||||||
|
When prune is True, removes DB rows not present in seed data.
|
||||||
|
"""
|
||||||
|
print("Starting seed..." + (" (with pruning)" if prune else ""))
|
||||||
|
|
||||||
async with async_session() as session, session.begin():
|
async with async_session() as session, session.begin():
|
||||||
# 1. Upsert version groups
|
# 1. Upsert version groups
|
||||||
@@ -88,7 +91,7 @@ async def seed():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Upsert routes once per version group
|
# Upsert routes once per version group
|
||||||
route_map = await upsert_routes(session, vg_id, routes_data)
|
route_map = await upsert_routes(session, vg_id, routes_data, prune=prune)
|
||||||
route_maps_by_vg[vg_id] = route_map
|
route_maps_by_vg[vg_id] = route_map
|
||||||
total_routes += len(route_map)
|
total_routes += len(route_map)
|
||||||
print(f" {vg_slug}: {len(route_map)} routes")
|
print(f" {vg_slug}: {len(route_map)} routes")
|
||||||
@@ -119,6 +122,7 @@ async def seed():
|
|||||||
route["encounters"],
|
route["encounters"],
|
||||||
dex_to_id,
|
dex_to_id,
|
||||||
game_id,
|
game_id,
|
||||||
|
prune=prune,
|
||||||
)
|
)
|
||||||
total_encounters += enc_count
|
total_encounters += enc_count
|
||||||
|
|
||||||
@@ -137,6 +141,7 @@ async def seed():
|
|||||||
child["encounters"],
|
child["encounters"],
|
||||||
dex_to_id,
|
dex_to_id,
|
||||||
game_id,
|
game_id,
|
||||||
|
prune=prune,
|
||||||
)
|
)
|
||||||
total_encounters += enc_count
|
total_encounters += enc_count
|
||||||
|
|
||||||
@@ -160,7 +165,13 @@ async def seed():
|
|||||||
|
|
||||||
route_name_to_id = route_maps_by_vg.get(vg_id, {})
|
route_name_to_id = route_maps_by_vg.get(vg_id, {})
|
||||||
boss_count = await upsert_bosses(
|
boss_count = await upsert_bosses(
|
||||||
session, vg_id, bosses_data, dex_to_id, route_name_to_id, slug_to_id
|
session,
|
||||||
|
vg_id,
|
||||||
|
bosses_data,
|
||||||
|
dex_to_id,
|
||||||
|
route_name_to_id,
|
||||||
|
slug_to_id,
|
||||||
|
prune=prune,
|
||||||
)
|
)
|
||||||
total_bosses += boss_count
|
total_bosses += boss_count
|
||||||
print(f" {vg_slug}: {boss_count} bosses")
|
print(f" {vg_slug}: {boss_count} bosses")
|
||||||
|
|||||||
Reference in New Issue
Block a user