dune-tools/character-builder/scripts/extract.py
Vantz Stockwell 5b3ccf630d Render real skill + perk icons in the live UI
extract.py now also copies every referenced icon webp from sample-data into
character-builder/frontend/public/icons/ so Vite bundles them with the SPA
(serves at /icons/<filename>.webp). 154 icons (91 skill + 63 perk) end up
in the build, ~668 KB total.

SkillTree nodes show the skill's icon centered in the node instead of the
name text (name moves below the node as a label). Hovered / allocated /
maxed states change icon brightness and saturation, with a sand-colored
drop-shadow on maxed nodes.

XpProgressCard renders the perk icon to the left of each perk's text.
Locked perks desaturate the icon. The grid grows from 2 columns to 3 to
accommodate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 08:07:39 -04:00

471 lines
17 KiB
Python

#!/usr/bin/env python3
"""Extract Dune Awakening game data from saved dune.gaming.tools HTML pages.
Outputs JSON files into ../data/:
- character-xp.json
- spec-{combat,crafting,exploration,gathering,sabotage}.json
- faction-{atreides,harkonnen}.json
- skills-{benegesserit,mentat,planetologist,swordmaster,trooper}.json
- index.json (manifest)
Also copies every referenced icon webp from sample-data/*/ into
../frontend/public/icons/ so the SPA can serve them as /icons/<name>.webp.
"""
import json
import re
import shutil
from html.parser import HTMLParser
from pathlib import Path
SAMPLE = Path(__file__).resolve().parents[2] / "sample-data"
OUT = Path(__file__).resolve().parents[1] / "data"
ICONS_OUT = Path(__file__).resolve().parents[1] / "frontend" / "public" / "icons"
OUT.mkdir(parents=True, exist_ok=True)
ICONS_OUT.mkdir(parents=True, exist_ok=True)
def find_icon_source(name: str) -> Path | None:
"""Locate an icon webp inside any sample-data/*_files/ subdirectory."""
for sub in SAMPLE.iterdir():
if sub.is_dir() and sub.name.endswith("_files"):
candidate = sub / name
if candidate.exists():
return candidate
return None
def copy_icons(names: set[str]) -> tuple[int, list[str]]:
"""Copy referenced icons into ICONS_OUT. Returns (copied, missing)."""
copied = 0
missing: list[str] = []
for n in names:
src = find_icon_source(n)
if not src:
missing.append(n)
continue
dst = ICONS_OUT / n
if not dst.exists() or src.stat().st_size != dst.stat().st_size:
shutil.copy2(src, dst)
copied += 1
return copied, missing
# ---------- generic <table class="datatable"> extractor ----------
class TableExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.in_table = False
self.in_row = False
self.in_cell = False
self.current_row = []
self.current_cell = ""
self.rows = []
self.header = []
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == "table" and "datatable" in (attrs.get("class") or ""):
self.in_table = True
elif self.in_table and tag == "tr":
self.in_row = True
self.current_row = []
elif self.in_table and tag in ("td", "th"):
self.in_cell = True
self.current_cell = ""
def handle_endtag(self, tag):
if tag == "table":
self.in_table = False
elif tag == "tr" and self.in_row:
if self.current_row:
self.rows.append(self.current_row)
self.in_row = False
elif tag in ("td", "th") and self.in_cell:
self.current_row.append(self.current_cell.strip())
self.in_cell = False
def handle_data(self, data):
if self.in_cell:
self.current_cell += data
def parse_table(path: Path):
p = TableExtractor()
p.feed(path.read_text())
return p.rows
def to_int(s: str) -> int:
cleaned = re.sub(r"[^\d-]", "", s or "")
if not cleaned or cleaned == "-":
return 0
try:
return int(cleaned)
except ValueError:
return 0
def extract_xp_table(path: Path, value_keys: list[str]) -> list[dict]:
"""For tables shaped: [Level | XP Required | Total XP | ...].
value_keys names the columns after Level."""
rows = parse_table(path)
if not rows:
return []
header = rows[0]
out = []
for r in rows[1:]:
if not r or not r[0].strip():
continue
try:
lvl = to_int(r[0])
except Exception:
continue
entry = {"level": lvl}
for i, key in enumerate(value_keys, start=1):
if i < len(r):
entry[key] = to_int(r[i])
out.append(entry)
return {"header": header, "rows": out}
# ---------- specialization perks ----------
# Spec tables have a "Rewards" <td> with rich HTML inside. Each row may grant
# one or more perks; each perk has a name, optional cost badge, description,
# optional effect line, optional cosmetic-unlock flag, and an icon.
SPEC_ROW_RE = re.compile(
r'<tr><td class="text-center">(?P<lvl>\d+)</td>'
r'<td class="text-center">[^<]*</td>'
r'<td class="text-center">[^<]*</td>'
r'<td>(?P<rewards>.*?)</td></tr>',
re.DOTALL,
)
PERK_BLOCK_RE = re.compile(
r'<div class="flex gap-3">(?P<inner>.*?)</div>\s*</div>',
re.DOTALL,
)
PERK_NAME_RE = re.compile(r'<span class="font-medium">([^<]+)</span>')
PERK_COST_RE = re.compile(
r'<span class="text-xs px-1\.5 py-0\.5 bg-tb-30 rounded">([^<]+)</span>'
)
PERK_DESC_RE = re.compile(
r'<div class="text-sm text-neutral-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_EFFECT_RE = re.compile(
r'<div class="text-sm text-green-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_BONUS_RE = re.compile(
r'<div class="text-sm text-yellow-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_[^"]+\.webp)"')
def extract_spec_perks(path: Path) -> dict[int, list[dict]]:
"""Return {level: [perk, ...]} for a specialization track HTML."""
html = path.read_text()
out: dict[int, list[dict]] = {}
for row_m in SPEC_ROW_RE.finditer(html):
lvl = int(row_m.group("lvl"))
rewards_html = row_m.group("rewards")
# rows with no perks render as <span class="text-gray-500">-</span>
if 'text-gray-500' in rewards_html and 'icon-container' not in rewards_html:
continue
perks: list[dict] = []
for blk_m in PERK_BLOCK_RE.finditer(rewards_html):
inner = blk_m.group("inner")
name = PERK_NAME_RE.search(inner)
if not name:
continue
perk = {"name": name.group(1).strip()}
cost = PERK_COST_RE.search(inner)
if cost:
perk["cost"] = cost.group(1).strip()
desc = PERK_DESC_RE.search(inner)
if desc:
perk["description"] = desc.group(1).strip()
effect = PERK_EFFECT_RE.search(inner)
if effect:
perk["effect"] = effect.group(1).strip()
bonus = PERK_BONUS_RE.search(inner)
if bonus:
perk["bonus"] = bonus.group(1).strip()
icon = PERK_ICON_RE.search(inner)
if icon:
perk["icon"] = icon.group(1)
perks.append(perk)
if perks:
out[lvl] = perks
return out
# ---------- skill tree extractor ----------
NODE_RE = re.compile(
r'<div\s+role="button"[^>]*class="node[^"]*"[^>]*data-tag="(?P<tag>Skills\.[^"]+)"[^>]*'
r'style="(?P<style>[^"]*)"[^>]*>'
r'(?P<inner>.*?)</div>\s*</div>',
re.DOTALL,
)
ALT_RE = re.compile(r'alt="([^"]+)"')
HREF_RE = re.compile(r'href="(https://dune\.gaming\.tools/skills/[^"]+)"')
ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_icon[^"]+\.webp)"')
GRID_RE = re.compile(r"grid-area:\s*(\d+)\s*/\s*(\d+)")
MAX_PTS_RE = re.compile(r">0/(\d+)<")
# connector lines: parse SVG <line> within the tree container
LINE_RE = re.compile(
r'<line[^>]*\sx1="(?P<x1>\d+)"[^>]*\sy1="(?P<y1>\d+)"[^>]*\sx2="(?P<x2>\d+)"[^>]*\sy2="(?P<y2>\d+)"'
)
def extract_skill_tree(path: Path, class_id: str, class_name: str) -> dict:
html = path.read_text()
nodes = []
# We need to also find the alt text + href + icon WITHIN each node's HTML.
# Strategy: walk through all data-tag="Skills..." occurrences, slice from
# opening of the node <div> to a balanced close. Simple slice: take 2000 chars
# after the tag and parse first alt/href/icon/max within it.
for m in re.finditer(r'data-tag="(Skills\.[^"]+)"', html):
tag = m.group(1)
start = m.start()
chunk = html[start : start + 2500]
gm = GRID_RE.search(chunk)
if not gm:
continue
row, col = int(gm.group(1)), int(gm.group(2))
alt = ALT_RE.search(chunk)
href = HREF_RE.search(chunk)
icon = ICON_RE.search(chunk)
max_pts = MAX_PTS_RE.search(chunk)
kind = tag.split(".")[1] if "." in tag else "Unknown"
nodes.append(
{
"tag": tag,
"id": tag.split(".")[-1],
"name": alt.group(1) if alt else tag.split(".")[-1],
"kind": kind, # Ability | Attribute | Perk | Spice
"row": row,
"col": col,
"maxPoints": int(max_pts.group(1)) if max_pts else 1,
"icon": icon.group(1) if icon else None,
"url": href.group(1) if href else None,
}
)
# de-duplicate nodes by tag (the regex can match twice if the same tag appears in
# a connector tooltip etc.)
seen = {}
for n in nodes:
if n["tag"] not in seen:
seen[n["tag"]] = n
nodes = list(seen.values())
# Build a position->node lookup. Grid is roughly square with ~73px cells based
# on observed example (grid 3/5 -> center 364,220 means col*~73, row*~73 with offset).
# We'll learn cell size from the data: if there are connectors, we map each (x,y) to
# the nearest node by Euclidean distance.
# First compute approximate node centers via grid math, calibrated from any node
# we can pin: actually a more reliable approach is to use the connector geometry.
edges = []
lines = list(LINE_RE.finditer(html))
if lines and nodes:
# Calibrate: find scale by looking at min/max grid coords vs min/max line coords.
all_x = [int(x) for ln in lines for x in (ln.group("x1"), ln.group("x2"))]
all_y = [int(y) for ln in lines for y in (ln.group("y1"), ln.group("y2"))]
min_x, max_x = min(all_x), max(all_x)
min_y, max_y = min(all_y), max(all_y)
cols = [n["col"] for n in nodes]
rows = [n["row"] for n in nodes]
min_c, max_c = min(cols), max(cols)
min_r, max_r = min(rows), max(rows)
# avoid div by zero
sx = (max_x - min_x) / max(1, (max_c - min_c))
sy = (max_y - min_y) / max(1, (max_r - min_r))
def center(n):
return (
min_x + (n["col"] - min_c) * sx,
min_y + (n["row"] - min_r) * sy,
)
centers = {n["tag"]: center(n) for n in nodes}
def nearest(x, y):
best_tag, best_d = None, float("inf")
for t, (cx, cy) in centers.items():
d = (cx - x) ** 2 + (cy - y) ** 2
if d < best_d:
best_d = d
best_tag = t
return best_tag
seen_edges = set()
for ln in lines:
x1, y1, x2, y2 = (
int(ln.group("x1")),
int(ln.group("y1")),
int(ln.group("x2")),
int(ln.group("y2")),
)
a = nearest(x1, y1)
b = nearest(x2, y2)
if a and b and a != b:
key = tuple(sorted((a, b)))
if key not in seen_edges:
seen_edges.add(key)
edges.append({"from": key[0], "to": key[1]})
return {
"id": class_id,
"name": class_name,
"nodes": nodes,
"edges": edges,
}
# ---------- main ----------
def main():
manifest = {"xp": {}, "factions": {}, "skills": []}
# Character XP (200 levels, 6 value columns). Character XP has no
# "Rewards" column, but every level grants skill / intel points — we
# synthesize a per-level "Level Reward" perk so the UI can show them.
char_xp = extract_xp_table(
SAMPLE / "Character XP Table - Dune Awakening.html",
[
"xpRequired",
"totalXp",
"skillPoints",
"totalSkillPoints",
"intelPoints",
"totalIntelPoints",
],
)
for row in char_xp["rows"]:
sp = row.get("skillPoints", 0) or 0
ip = row.get("intelPoints", 0) or 0
if sp == 0 and ip == 0:
continue
parts = []
if sp > 0:
parts.append(f"+{sp} Skill Point" + ("s" if sp > 1 else ""))
if ip > 0:
parts.append(f"+{ip} Intel Point" + ("s" if ip > 1 else ""))
row["perks"] = [
{
"name": "Level Reward",
"effect": " · ".join(parts),
}
]
(OUT / "character-xp.json").write_text(json.dumps(char_xp, indent=2))
manifest["xp"]["character"] = "character-xp.json"
# Specialization XP (5 tracks) — also extract perks at each level.
specs = ["Combat", "Crafting", "Exploration", "Gathering", "Sabotage"]
for spec in specs:
src = SAMPLE / f"{spec} Track XP Table - Dune Awakening.html"
data = extract_xp_table(
src, ["xpRequired", "totalXp", "intelPoints", "totalIntelPoints"]
)
perks_by_level = extract_spec_perks(src)
for row in data["rows"]:
perks = perks_by_level.get(row["level"])
if perks:
row["perks"] = perks
slug = spec.lower()
(OUT / f"spec-{slug}.json").write_text(json.dumps(data, indent=2))
manifest["xp"][slug] = f"spec-{slug}.json"
# Faction standing — different shape: Tier# | TierName | Required Rep | Cumulative
factions = [("Atreides", "atreides"), ("Harkonnen", "harkonnen")]
for fac_name, fac_id in factions:
rows = parse_table(
SAMPLE / f"House {fac_name} Faction Standing Table - Dune Awakening.html"
)
header = rows[0] if rows else []
tiers = []
for r in rows[1:]:
if not r or len(r) < 4:
continue
tiers.append(
{
"tier": to_int(r[0]),
"name": r[1].strip(),
"standingRequired": to_int(r[2]),
"totalStanding": to_int(r[3]),
}
)
(OUT / f"faction-{fac_id}.json").write_text(
json.dumps({"header": header, "tiers": tiers}, indent=2)
)
manifest["factions"][fac_id] = f"faction-{fac_id}.json"
# Skill trees
classes = [
("Bene Gesserit", "benegesserit"),
("Mentat", "mentat"),
("Planetologist", "planetologist"),
("Swordmaster", "swordmaster"),
("Trooper", "trooper"),
]
for cls_name, cls_id in classes:
path = SAMPLE / f"Dune Awakening Skill Builder - {cls_name}.html"
if not path.exists():
print(f"!! missing {path.name}")
continue
tree = extract_skill_tree(path, cls_id, cls_name)
(OUT / f"skills-{cls_id}.json").write_text(json.dumps(tree, indent=2))
manifest["skills"].append(
{
"id": cls_id,
"name": cls_name,
"file": f"skills-{cls_id}.json",
"nodes": len(tree["nodes"]),
"edges": len(tree["edges"]),
}
)
# ---------- copy referenced icon webps ----------
icon_names: set[str] = set()
for cls_name, cls_id in classes:
path = OUT / f"skills-{cls_id}.json"
if path.exists():
tree = json.loads(path.read_text())
for n in tree["nodes"]:
if n.get("icon"):
icon_names.add(n["icon"])
for spec in specs:
slug = spec.lower()
spec_data = json.loads((OUT / f"spec-{slug}.json").read_text())
for r in spec_data["rows"]:
for p in r.get("perks", []):
if p.get("icon"):
icon_names.add(p["icon"])
copied, missing = copy_icons(icon_names)
manifest["icons"] = {
"directory": "frontend/public/icons",
"served_at": "/icons/",
"count": copied,
"missing": missing,
}
(OUT / "index.json").write_text(json.dumps(manifest, indent=2))
# Print summary
print("\n=== Extraction summary ===")
cx = json.loads((OUT / "character-xp.json").read_text())
print(f"character XP rows: {len(cx['rows'])} cols: {cx['header']}")
for spec in ["combat", "crafting", "exploration", "gathering", "sabotage"]:
d = json.loads((OUT / f"spec-{spec}.json").read_text())
print(f" spec {spec:11s} rows: {len(d['rows'])}")
for fac in ["atreides", "harkonnen"]:
d = json.loads((OUT / f"faction-{fac}.json").read_text())
print(f" faction {fac:9s} tiers: {len(d['tiers'])} cols: {d['header']}")
for s in manifest["skills"]:
print(f" skills {s['id']:14s} nodes: {s['nodes']:3d} edges: {s['edges']:3d}")
print(
f" icons copied: {manifest['icons']['count']}"
+ (f" missing: {len(manifest['icons']['missing'])}" if manifest['icons']['missing'] else "")
)
if __name__ == "__main__":
main()