dune-tools/character-builder/scripts/extract.py
Vantz Stockwell f142725dd8 Skill tree: subtrees, loadout slots, label fix
Skill trees now render the way the source does: each class has three named
subtrees (e.g. Swordmaster: The Blade / The Will / The Way), each with its
own 3-col or 5-col grid, sized in 72px cells. Extractor parses subtrees
separately so the per-tree row/col coordinates are correct (previously all
22 nodes were stacked on one combined grid and overlapped). Connector
edges are mapped per-subtree too.

Loadout: new global 3-ability + 3-technique slot row at the bottom of the
Skill Trees panel. The cap is global across all 5 classes (matches the
source HTML which has `id=active-Ability-N` / `id=active-Technique-N`
without per-tree scope). Click a slot to pick from any allocated Ability
or Spice (for Ability slots) or any allocated Perk (for Technique slots);
right-click clears. Slot backgrounds use the local ability.png /
technique.png artwork copied into /icons.

Label overlap fix: constrained the name label under each node to the node
width (72px) and bumped the vertical gap from 44 to 60px so 2-3 line names
have room without bleeding into the row below.

Existing saved builds migrate cleanly — loadout normalizes to length-3
slot arrays if absent or malformed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 08:21:09 -04:00

501 lines
18 KiB
Python

#!/usr/bin/env python3
"""Extract Dune Awakening game data from saved dune.gaming.tools HTML pages.
Outputs JSON files into ../data/:
- character-xp.json
- spec-{combat,crafting,exploration,gathering,sabotage}.json
- faction-{atreides,harkonnen}.json
- skills-{benegesserit,mentat,planetologist,swordmaster,trooper}.json
- index.json (manifest)
Also copies every referenced icon webp from sample-data/*/ into
../frontend/public/icons/ so the SPA can serve them as /icons/<name>.webp.
"""
import json
import re
import shutil
from html.parser import HTMLParser
from pathlib import Path
SAMPLE = Path(__file__).resolve().parents[2] / "sample-data"
OUT = Path(__file__).resolve().parents[1] / "data"
ICONS_OUT = Path(__file__).resolve().parents[1] / "frontend" / "public" / "icons"
OUT.mkdir(parents=True, exist_ok=True)
ICONS_OUT.mkdir(parents=True, exist_ok=True)
def find_icon_source(name: str) -> Path | None:
"""Locate an icon webp inside any sample-data/*_files/ subdirectory."""
for sub in SAMPLE.iterdir():
if sub.is_dir() and sub.name.endswith("_files"):
candidate = sub / name
if candidate.exists():
return candidate
return None
def copy_icons(names: set[str]) -> tuple[int, list[str]]:
"""Copy referenced icons into ICONS_OUT. Returns (copied, missing)."""
copied = 0
missing: list[str] = []
for n in names:
src = find_icon_source(n)
if not src:
missing.append(n)
continue
dst = ICONS_OUT / n
if not dst.exists() or src.stat().st_size != dst.stat().st_size:
shutil.copy2(src, dst)
copied += 1
return copied, missing
# ---------- generic <table class="datatable"> extractor ----------
class TableExtractor(HTMLParser):
def __init__(self):
super().__init__()
self.in_table = False
self.in_row = False
self.in_cell = False
self.current_row = []
self.current_cell = ""
self.rows = []
self.header = []
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == "table" and "datatable" in (attrs.get("class") or ""):
self.in_table = True
elif self.in_table and tag == "tr":
self.in_row = True
self.current_row = []
elif self.in_table and tag in ("td", "th"):
self.in_cell = True
self.current_cell = ""
def handle_endtag(self, tag):
if tag == "table":
self.in_table = False
elif tag == "tr" and self.in_row:
if self.current_row:
self.rows.append(self.current_row)
self.in_row = False
elif tag in ("td", "th") and self.in_cell:
self.current_row.append(self.current_cell.strip())
self.in_cell = False
def handle_data(self, data):
if self.in_cell:
self.current_cell += data
def parse_table(path: Path):
p = TableExtractor()
p.feed(path.read_text())
return p.rows
def to_int(s: str) -> int:
cleaned = re.sub(r"[^\d-]", "", s or "")
if not cleaned or cleaned == "-":
return 0
try:
return int(cleaned)
except ValueError:
return 0
def extract_xp_table(path: Path, value_keys: list[str]) -> list[dict]:
"""For tables shaped: [Level | XP Required | Total XP | ...].
value_keys names the columns after Level."""
rows = parse_table(path)
if not rows:
return []
header = rows[0]
out = []
for r in rows[1:]:
if not r or not r[0].strip():
continue
try:
lvl = to_int(r[0])
except Exception:
continue
entry = {"level": lvl}
for i, key in enumerate(value_keys, start=1):
if i < len(r):
entry[key] = to_int(r[i])
out.append(entry)
return {"header": header, "rows": out}
# ---------- specialization perks ----------
# Spec tables have a "Rewards" <td> with rich HTML inside. Each row may grant
# one or more perks; each perk has a name, optional cost badge, description,
# optional effect line, optional cosmetic-unlock flag, and an icon.
SPEC_ROW_RE = re.compile(
r'<tr><td class="text-center">(?P<lvl>\d+)</td>'
r'<td class="text-center">[^<]*</td>'
r'<td class="text-center">[^<]*</td>'
r'<td>(?P<rewards>.*?)</td></tr>',
re.DOTALL,
)
PERK_BLOCK_RE = re.compile(
r'<div class="flex gap-3">(?P<inner>.*?)</div>\s*</div>',
re.DOTALL,
)
PERK_NAME_RE = re.compile(r'<span class="font-medium">([^<]+)</span>')
PERK_COST_RE = re.compile(
r'<span class="text-xs px-1\.5 py-0\.5 bg-tb-30 rounded">([^<]+)</span>'
)
PERK_DESC_RE = re.compile(
r'<div class="text-sm text-neutral-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_EFFECT_RE = re.compile(
r'<div class="text-sm text-green-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_BONUS_RE = re.compile(
r'<div class="text-sm text-yellow-400[^"]*"[^>]*>([^<]+)</div>'
)
PERK_ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_[^"]+\.webp)"')
def extract_spec_perks(path: Path) -> dict[int, list[dict]]:
"""Return {level: [perk, ...]} for a specialization track HTML."""
html = path.read_text()
out: dict[int, list[dict]] = {}
for row_m in SPEC_ROW_RE.finditer(html):
lvl = int(row_m.group("lvl"))
rewards_html = row_m.group("rewards")
# rows with no perks render as <span class="text-gray-500">-</span>
if 'text-gray-500' in rewards_html and 'icon-container' not in rewards_html:
continue
perks: list[dict] = []
for blk_m in PERK_BLOCK_RE.finditer(rewards_html):
inner = blk_m.group("inner")
name = PERK_NAME_RE.search(inner)
if not name:
continue
perk = {"name": name.group(1).strip()}
cost = PERK_COST_RE.search(inner)
if cost:
perk["cost"] = cost.group(1).strip()
desc = PERK_DESC_RE.search(inner)
if desc:
perk["description"] = desc.group(1).strip()
effect = PERK_EFFECT_RE.search(inner)
if effect:
perk["effect"] = effect.group(1).strip()
bonus = PERK_BONUS_RE.search(inner)
if bonus:
perk["bonus"] = bonus.group(1).strip()
icon = PERK_ICON_RE.search(inner)
if icon:
perk["icon"] = icon.group(1)
perks.append(perk)
if perks:
out[lvl] = perks
return out
# ---------- skill tree extractor ----------
NODE_RE = re.compile(
r'<div\s+role="button"[^>]*class="node[^"]*"[^>]*data-tag="(?P<tag>Skills\.[^"]+)"[^>]*'
r'style="(?P<style>[^"]*)"[^>]*>'
r'(?P<inner>.*?)</div>\s*</div>',
re.DOTALL,
)
ALT_RE = re.compile(r'alt="([^"]+)"')
HREF_RE = re.compile(r'href="(https://dune\.gaming\.tools/skills/[^"]+)"')
ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_icon[^"]+\.webp)"')
GRID_RE = re.compile(r"grid-area:\s*(\d+)\s*/\s*(\d+)")
MAX_PTS_RE = re.compile(r">0/(\d+)<")
# connector lines: parse SVG <line> within the tree container
LINE_RE = re.compile(
r'<line[^>]*\sx1="(?P<x1>\d+)"[^>]*\sy1="(?P<y1>\d+)"[^>]*\sx2="(?P<x2>\d+)"[^>]*\sy2="(?P<y2>\d+)"'
)
SUBTREE_H3_RE = re.compile(
r'<h3[^>]*class="[^"]*text-xl[^"]*"[^>]*>([^<]+)</h3>'
)
GRID_COLS_RE = re.compile(r"grid-template-columns:\s*repeat\((\d+),\s*72px\)")
def _extract_node(chunk: str, tag: str) -> dict | None:
gm = GRID_RE.search(chunk)
if not gm:
return None
row, col = int(gm.group(1)), int(gm.group(2))
alt = ALT_RE.search(chunk)
href = HREF_RE.search(chunk)
icon = ICON_RE.search(chunk)
max_pts = MAX_PTS_RE.search(chunk)
kind = tag.split(".")[1] if "." in tag else "Unknown"
return {
"tag": tag,
"id": tag.split(".")[-1],
"name": alt.group(1) if alt else tag.split(".")[-1],
"kind": kind, # Ability | Attribute | Perk | Spice
"row": row,
"col": col,
"maxPoints": int(max_pts.group(1)) if max_pts else 1,
"icon": icon.group(1) if icon else None,
"url": href.group(1) if href else None,
}
def _map_edges(html_slice: str, nodes: list[dict]) -> list[dict]:
"""Pixel-match connector <line> endpoints to the nearest nodes in this
subtree. Returns deduped edge list."""
lines = list(LINE_RE.finditer(html_slice))
if not lines or not nodes:
return []
all_x = [int(x) for ln in lines for x in (ln.group("x1"), ln.group("x2"))]
all_y = [int(y) for ln in lines for y in (ln.group("y1"), ln.group("y2"))]
min_x, max_x = min(all_x), max(all_x)
min_y, max_y = min(all_y), max(all_y)
cols = [n["col"] for n in nodes]
rows_ = [n["row"] for n in nodes]
min_c, max_c = min(cols), max(cols)
min_r, max_r = min(rows_), max(rows_)
sx = (max_x - min_x) / max(1, (max_c - min_c))
sy = (max_y - min_y) / max(1, (max_r - min_r))
centers = {
n["tag"]: (min_x + (n["col"] - min_c) * sx, min_y + (n["row"] - min_r) * sy)
for n in nodes
}
def nearest(x: int, y: int) -> str | None:
best, best_d = None, float("inf")
for tag, (cx, cy) in centers.items():
d = (cx - x) ** 2 + (cy - y) ** 2
if d < best_d:
best_d, best = d, tag
return best
seen, edges = set(), []
for ln in lines:
x1, y1, x2, y2 = (int(ln.group(k)) for k in ("x1", "y1", "x2", "y2"))
a, b = nearest(x1, y1), nearest(x2, y2)
if a and b and a != b:
key = tuple(sorted((a, b)))
if key not in seen:
seen.add(key)
edges.append({"from": key[0], "to": key[1]})
return edges
def extract_skill_tree(path: Path, class_id: str, class_name: str) -> dict:
"""Parse a class skill tree into its named subtrees.
Each class is composed of 3 subtrees (e.g. Swordmaster has "The Blade",
"The Will", "The Way"). Each subtree is its own CSS grid with its own
column count, node positions, and connectors. Treating them as one big
grid (the prior behavior) collapsed all 22 nodes on top of each other.
"""
html = path.read_text()
# Split on subtree H3 headers; first chunk is preamble.
chunks = SUBTREE_H3_RE.split(html)
preamble, pairs = chunks[0], chunks[1:]
subtrees: list[dict] = []
for i in range(0, len(pairs), 2):
name = pairs[i].strip()
body = pairs[i + 1] if i + 1 < len(pairs) else ""
# Slice off anything that belongs to the next subtree (already handled
# by split) or to trailing page chrome — search for the closing of the
# graph div by counting from the start of the graph element.
graph_start = body.find('<div class="graph svelte-1dvag2h"')
if graph_start < 0:
continue
body = body[graph_start:]
cols_m = GRID_COLS_RE.search(body)
cols = int(cols_m.group(1)) if cols_m else 3
# Parse nodes inside this subtree.
nodes: list[dict] = []
seen_tags: set[str] = set()
for m in re.finditer(r'data-tag="(Skills\.[^"]+)"', body):
tag = m.group(1)
if tag in seen_tags:
continue
chunk = body[m.start() : m.start() + 2500]
node = _extract_node(chunk, tag)
if node:
nodes.append(node)
seen_tags.add(tag)
edges = _map_edges(body, nodes)
subtrees.append(
{"name": name, "cols": cols, "nodes": nodes, "edges": edges}
)
return {"id": class_id, "name": class_name, "subtrees": subtrees}
# ---------- main ----------
def main():
manifest = {"xp": {}, "factions": {}, "skills": []}
# Character XP (200 levels, 6 value columns). Character XP has no
# "Rewards" column, but every level grants skill / intel points — we
# synthesize a per-level "Level Reward" perk so the UI can show them.
char_xp = extract_xp_table(
SAMPLE / "Character XP Table - Dune Awakening.html",
[
"xpRequired",
"totalXp",
"skillPoints",
"totalSkillPoints",
"intelPoints",
"totalIntelPoints",
],
)
for row in char_xp["rows"]:
sp = row.get("skillPoints", 0) or 0
ip = row.get("intelPoints", 0) or 0
if sp == 0 and ip == 0:
continue
parts = []
if sp > 0:
parts.append(f"+{sp} Skill Point" + ("s" if sp > 1 else ""))
if ip > 0:
parts.append(f"+{ip} Intel Point" + ("s" if ip > 1 else ""))
row["perks"] = [
{
"name": "Level Reward",
"effect": " · ".join(parts),
}
]
(OUT / "character-xp.json").write_text(json.dumps(char_xp, indent=2))
manifest["xp"]["character"] = "character-xp.json"
# Specialization XP (5 tracks) — also extract perks at each level.
specs = ["Combat", "Crafting", "Exploration", "Gathering", "Sabotage"]
for spec in specs:
src = SAMPLE / f"{spec} Track XP Table - Dune Awakening.html"
data = extract_xp_table(
src, ["xpRequired", "totalXp", "intelPoints", "totalIntelPoints"]
)
perks_by_level = extract_spec_perks(src)
for row in data["rows"]:
perks = perks_by_level.get(row["level"])
if perks:
row["perks"] = perks
slug = spec.lower()
(OUT / f"spec-{slug}.json").write_text(json.dumps(data, indent=2))
manifest["xp"][slug] = f"spec-{slug}.json"
# Faction standing — different shape: Tier# | TierName | Required Rep | Cumulative
factions = [("Atreides", "atreides"), ("Harkonnen", "harkonnen")]
for fac_name, fac_id in factions:
rows = parse_table(
SAMPLE / f"House {fac_name} Faction Standing Table - Dune Awakening.html"
)
header = rows[0] if rows else []
tiers = []
for r in rows[1:]:
if not r or len(r) < 4:
continue
tiers.append(
{
"tier": to_int(r[0]),
"name": r[1].strip(),
"standingRequired": to_int(r[2]),
"totalStanding": to_int(r[3]),
}
)
(OUT / f"faction-{fac_id}.json").write_text(
json.dumps({"header": header, "tiers": tiers}, indent=2)
)
manifest["factions"][fac_id] = f"faction-{fac_id}.json"
# Skill trees
classes = [
("Bene Gesserit", "benegesserit"),
("Mentat", "mentat"),
("Planetologist", "planetologist"),
("Swordmaster", "swordmaster"),
("Trooper", "trooper"),
]
for cls_name, cls_id in classes:
path = SAMPLE / f"Dune Awakening Skill Builder - {cls_name}.html"
if not path.exists():
print(f"!! missing {path.name}")
continue
tree = extract_skill_tree(path, cls_id, cls_name)
(OUT / f"skills-{cls_id}.json").write_text(json.dumps(tree, indent=2))
total_nodes = sum(len(st["nodes"]) for st in tree["subtrees"])
total_edges = sum(len(st["edges"]) for st in tree["subtrees"])
manifest["skills"].append(
{
"id": cls_id,
"name": cls_name,
"file": f"skills-{cls_id}.json",
"subtrees": [st["name"] for st in tree["subtrees"]],
"nodes": total_nodes,
"edges": total_edges,
}
)
# ---------- copy referenced icon webps ----------
icon_names: set[str] = set()
for cls_name, cls_id in classes:
path = OUT / f"skills-{cls_id}.json"
if path.exists():
tree = json.loads(path.read_text())
for st in tree["subtrees"]:
for n in st["nodes"]:
if n.get("icon"):
icon_names.add(n["icon"])
for spec in specs:
slug = spec.lower()
spec_data = json.loads((OUT / f"spec-{slug}.json").read_text())
for r in spec_data["rows"]:
for p in r.get("perks", []):
if p.get("icon"):
icon_names.add(p["icon"])
copied, missing = copy_icons(icon_names)
# Slot background images for the global Abilities + Techniques loadout —
# the source HTML references them from a CDN, but local copies live in
# the per-class _files directories.
for src_name, dst_name in [
("ability.png", "slot-ability.png"),
("technique.png", "slot-technique.png"),
]:
src = find_icon_source(src_name)
if src:
shutil.copy2(src, ICONS_OUT / dst_name)
manifest["icons"] = {
"directory": "frontend/public/icons",
"served_at": "/icons/",
"count": copied,
"missing": missing,
}
(OUT / "index.json").write_text(json.dumps(manifest, indent=2))
# Print summary
print("\n=== Extraction summary ===")
cx = json.loads((OUT / "character-xp.json").read_text())
print(f"character XP rows: {len(cx['rows'])} cols: {cx['header']}")
for spec in ["combat", "crafting", "exploration", "gathering", "sabotage"]:
d = json.loads((OUT / f"spec-{spec}.json").read_text())
print(f" spec {spec:11s} rows: {len(d['rows'])}")
for fac in ["atreides", "harkonnen"]:
d = json.loads((OUT / f"faction-{fac}.json").read_text())
print(f" faction {fac:9s} tiers: {len(d['tiers'])} cols: {d['header']}")
for s in manifest["skills"]:
print(f" skills {s['id']:14s} nodes: {s['nodes']:3d} edges: {s['edges']:3d}")
print(
f" icons copied: {manifest['icons']['count']}"
+ (f" missing: {len(manifest['icons']['missing'])}" if manifest['icons']['missing'] else "")
)
if __name__ == "__main__":
main()