extract.py now also copies every referenced icon webp from sample-data into character-builder/frontend/public/icons/ so Vite bundles them with the SPA (serves at /icons/<filename>.webp). 154 icons (91 skill + 63 perk) end up in the build, ~668 KB total. SkillTree nodes show the skill's icon centered in the node instead of the name text (name moves below the node as a label). Hovered / allocated / maxed states change icon brightness and saturation, with a sand-colored drop-shadow on maxed nodes. XpProgressCard renders the perk icon to the left of each perk's text. Locked perks desaturate the icon. The grid grows from 2 columns to 3 to accommodate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
471 lines
17 KiB
Python
471 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract Dune Awakening game data from saved dune.gaming.tools HTML pages.
|
|
|
|
Outputs JSON files into ../data/:
|
|
- character-xp.json
|
|
- spec-{combat,crafting,exploration,gathering,sabotage}.json
|
|
- faction-{atreides,harkonnen}.json
|
|
- skills-{benegesserit,mentat,planetologist,swordmaster,trooper}.json
|
|
- index.json (manifest)
|
|
|
|
Also copies every referenced icon webp from sample-data/*/ into
|
|
../frontend/public/icons/ so the SPA can serve them as /icons/<name>.webp.
|
|
"""
|
|
import json
|
|
import re
|
|
import shutil
|
|
from html.parser import HTMLParser
|
|
from pathlib import Path
|
|
|
|
SAMPLE = Path(__file__).resolve().parents[2] / "sample-data"
|
|
OUT = Path(__file__).resolve().parents[1] / "data"
|
|
ICONS_OUT = Path(__file__).resolve().parents[1] / "frontend" / "public" / "icons"
|
|
OUT.mkdir(parents=True, exist_ok=True)
|
|
ICONS_OUT.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def find_icon_source(name: str) -> Path | None:
|
|
"""Locate an icon webp inside any sample-data/*_files/ subdirectory."""
|
|
for sub in SAMPLE.iterdir():
|
|
if sub.is_dir() and sub.name.endswith("_files"):
|
|
candidate = sub / name
|
|
if candidate.exists():
|
|
return candidate
|
|
return None
|
|
|
|
|
|
def copy_icons(names: set[str]) -> tuple[int, list[str]]:
|
|
"""Copy referenced icons into ICONS_OUT. Returns (copied, missing)."""
|
|
copied = 0
|
|
missing: list[str] = []
|
|
for n in names:
|
|
src = find_icon_source(n)
|
|
if not src:
|
|
missing.append(n)
|
|
continue
|
|
dst = ICONS_OUT / n
|
|
if not dst.exists() or src.stat().st_size != dst.stat().st_size:
|
|
shutil.copy2(src, dst)
|
|
copied += 1
|
|
return copied, missing
|
|
|
|
|
|
# ---------- generic <table class="datatable"> extractor ----------
|
|
class TableExtractor(HTMLParser):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.in_table = False
|
|
self.in_row = False
|
|
self.in_cell = False
|
|
self.current_row = []
|
|
self.current_cell = ""
|
|
self.rows = []
|
|
self.header = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
attrs = dict(attrs)
|
|
if tag == "table" and "datatable" in (attrs.get("class") or ""):
|
|
self.in_table = True
|
|
elif self.in_table and tag == "tr":
|
|
self.in_row = True
|
|
self.current_row = []
|
|
elif self.in_table and tag in ("td", "th"):
|
|
self.in_cell = True
|
|
self.current_cell = ""
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag == "table":
|
|
self.in_table = False
|
|
elif tag == "tr" and self.in_row:
|
|
if self.current_row:
|
|
self.rows.append(self.current_row)
|
|
self.in_row = False
|
|
elif tag in ("td", "th") and self.in_cell:
|
|
self.current_row.append(self.current_cell.strip())
|
|
self.in_cell = False
|
|
|
|
def handle_data(self, data):
|
|
if self.in_cell:
|
|
self.current_cell += data
|
|
|
|
|
|
def parse_table(path: Path):
|
|
p = TableExtractor()
|
|
p.feed(path.read_text())
|
|
return p.rows
|
|
|
|
|
|
def to_int(s: str) -> int:
|
|
cleaned = re.sub(r"[^\d-]", "", s or "")
|
|
if not cleaned or cleaned == "-":
|
|
return 0
|
|
try:
|
|
return int(cleaned)
|
|
except ValueError:
|
|
return 0
|
|
|
|
|
|
def extract_xp_table(path: Path, value_keys: list[str]) -> list[dict]:
|
|
"""For tables shaped: [Level | XP Required | Total XP | ...].
|
|
value_keys names the columns after Level."""
|
|
rows = parse_table(path)
|
|
if not rows:
|
|
return []
|
|
header = rows[0]
|
|
out = []
|
|
for r in rows[1:]:
|
|
if not r or not r[0].strip():
|
|
continue
|
|
try:
|
|
lvl = to_int(r[0])
|
|
except Exception:
|
|
continue
|
|
entry = {"level": lvl}
|
|
for i, key in enumerate(value_keys, start=1):
|
|
if i < len(r):
|
|
entry[key] = to_int(r[i])
|
|
out.append(entry)
|
|
return {"header": header, "rows": out}
|
|
|
|
|
|
# ---------- specialization perks ----------
|
|
# Spec tables have a "Rewards" <td> with rich HTML inside. Each row may grant
|
|
# one or more perks; each perk has a name, optional cost badge, description,
|
|
# optional effect line, optional cosmetic-unlock flag, and an icon.
|
|
SPEC_ROW_RE = re.compile(
|
|
r'<tr><td class="text-center">(?P<lvl>\d+)</td>'
|
|
r'<td class="text-center">[^<]*</td>'
|
|
r'<td class="text-center">[^<]*</td>'
|
|
r'<td>(?P<rewards>.*?)</td></tr>',
|
|
re.DOTALL,
|
|
)
|
|
PERK_BLOCK_RE = re.compile(
|
|
r'<div class="flex gap-3">(?P<inner>.*?)</div>\s*</div>',
|
|
re.DOTALL,
|
|
)
|
|
PERK_NAME_RE = re.compile(r'<span class="font-medium">([^<]+)</span>')
|
|
PERK_COST_RE = re.compile(
|
|
r'<span class="text-xs px-1\.5 py-0\.5 bg-tb-30 rounded">([^<]+)</span>'
|
|
)
|
|
PERK_DESC_RE = re.compile(
|
|
r'<div class="text-sm text-neutral-400[^"]*"[^>]*>([^<]+)</div>'
|
|
)
|
|
PERK_EFFECT_RE = re.compile(
|
|
r'<div class="text-sm text-green-400[^"]*"[^>]*>([^<]+)</div>'
|
|
)
|
|
PERK_BONUS_RE = re.compile(
|
|
r'<div class="text-sm text-yellow-400[^"]*"[^>]*>([^<]+)</div>'
|
|
)
|
|
PERK_ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_[^"]+\.webp)"')
|
|
|
|
|
|
def extract_spec_perks(path: Path) -> dict[int, list[dict]]:
|
|
"""Return {level: [perk, ...]} for a specialization track HTML."""
|
|
html = path.read_text()
|
|
out: dict[int, list[dict]] = {}
|
|
for row_m in SPEC_ROW_RE.finditer(html):
|
|
lvl = int(row_m.group("lvl"))
|
|
rewards_html = row_m.group("rewards")
|
|
# rows with no perks render as <span class="text-gray-500">-</span>
|
|
if 'text-gray-500' in rewards_html and 'icon-container' not in rewards_html:
|
|
continue
|
|
perks: list[dict] = []
|
|
for blk_m in PERK_BLOCK_RE.finditer(rewards_html):
|
|
inner = blk_m.group("inner")
|
|
name = PERK_NAME_RE.search(inner)
|
|
if not name:
|
|
continue
|
|
perk = {"name": name.group(1).strip()}
|
|
cost = PERK_COST_RE.search(inner)
|
|
if cost:
|
|
perk["cost"] = cost.group(1).strip()
|
|
desc = PERK_DESC_RE.search(inner)
|
|
if desc:
|
|
perk["description"] = desc.group(1).strip()
|
|
effect = PERK_EFFECT_RE.search(inner)
|
|
if effect:
|
|
perk["effect"] = effect.group(1).strip()
|
|
bonus = PERK_BONUS_RE.search(inner)
|
|
if bonus:
|
|
perk["bonus"] = bonus.group(1).strip()
|
|
icon = PERK_ICON_RE.search(inner)
|
|
if icon:
|
|
perk["icon"] = icon.group(1)
|
|
perks.append(perk)
|
|
if perks:
|
|
out[lvl] = perks
|
|
return out
|
|
|
|
|
|
# ---------- skill tree extractor ----------
|
|
NODE_RE = re.compile(
|
|
r'<div\s+role="button"[^>]*class="node[^"]*"[^>]*data-tag="(?P<tag>Skills\.[^"]+)"[^>]*'
|
|
r'style="(?P<style>[^"]*)"[^>]*>'
|
|
r'(?P<inner>.*?)</div>\s*</div>',
|
|
re.DOTALL,
|
|
)
|
|
ALT_RE = re.compile(r'alt="([^"]+)"')
|
|
HREF_RE = re.compile(r'href="(https://dune\.gaming\.tools/skills/[^"]+)"')
|
|
ICON_RE = re.compile(r'src="\./[^"]*/(t_ui_icon[^"]+\.webp)"')
|
|
GRID_RE = re.compile(r"grid-area:\s*(\d+)\s*/\s*(\d+)")
|
|
MAX_PTS_RE = re.compile(r">0/(\d+)<")
|
|
|
|
# connector lines: parse SVG <line> within the tree container
|
|
LINE_RE = re.compile(
|
|
r'<line[^>]*\sx1="(?P<x1>\d+)"[^>]*\sy1="(?P<y1>\d+)"[^>]*\sx2="(?P<x2>\d+)"[^>]*\sy2="(?P<y2>\d+)"'
|
|
)
|
|
|
|
|
|
def extract_skill_tree(path: Path, class_id: str, class_name: str) -> dict:
|
|
html = path.read_text()
|
|
nodes = []
|
|
# We need to also find the alt text + href + icon WITHIN each node's HTML.
|
|
# Strategy: walk through all data-tag="Skills..." occurrences, slice from
|
|
# opening of the node <div> to a balanced close. Simple slice: take 2000 chars
|
|
# after the tag and parse first alt/href/icon/max within it.
|
|
for m in re.finditer(r'data-tag="(Skills\.[^"]+)"', html):
|
|
tag = m.group(1)
|
|
start = m.start()
|
|
chunk = html[start : start + 2500]
|
|
gm = GRID_RE.search(chunk)
|
|
if not gm:
|
|
continue
|
|
row, col = int(gm.group(1)), int(gm.group(2))
|
|
alt = ALT_RE.search(chunk)
|
|
href = HREF_RE.search(chunk)
|
|
icon = ICON_RE.search(chunk)
|
|
max_pts = MAX_PTS_RE.search(chunk)
|
|
kind = tag.split(".")[1] if "." in tag else "Unknown"
|
|
nodes.append(
|
|
{
|
|
"tag": tag,
|
|
"id": tag.split(".")[-1],
|
|
"name": alt.group(1) if alt else tag.split(".")[-1],
|
|
"kind": kind, # Ability | Attribute | Perk | Spice
|
|
"row": row,
|
|
"col": col,
|
|
"maxPoints": int(max_pts.group(1)) if max_pts else 1,
|
|
"icon": icon.group(1) if icon else None,
|
|
"url": href.group(1) if href else None,
|
|
}
|
|
)
|
|
|
|
# de-duplicate nodes by tag (the regex can match twice if the same tag appears in
|
|
# a connector tooltip etc.)
|
|
seen = {}
|
|
for n in nodes:
|
|
if n["tag"] not in seen:
|
|
seen[n["tag"]] = n
|
|
nodes = list(seen.values())
|
|
|
|
# Build a position->node lookup. Grid is roughly square with ~73px cells based
|
|
# on observed example (grid 3/5 -> center 364,220 means col*~73, row*~73 with offset).
|
|
# We'll learn cell size from the data: if there are connectors, we map each (x,y) to
|
|
# the nearest node by Euclidean distance.
|
|
# First compute approximate node centers via grid math, calibrated from any node
|
|
# we can pin: actually a more reliable approach is to use the connector geometry.
|
|
edges = []
|
|
lines = list(LINE_RE.finditer(html))
|
|
if lines and nodes:
|
|
# Calibrate: find scale by looking at min/max grid coords vs min/max line coords.
|
|
all_x = [int(x) for ln in lines for x in (ln.group("x1"), ln.group("x2"))]
|
|
all_y = [int(y) for ln in lines for y in (ln.group("y1"), ln.group("y2"))]
|
|
min_x, max_x = min(all_x), max(all_x)
|
|
min_y, max_y = min(all_y), max(all_y)
|
|
cols = [n["col"] for n in nodes]
|
|
rows = [n["row"] for n in nodes]
|
|
min_c, max_c = min(cols), max(cols)
|
|
min_r, max_r = min(rows), max(rows)
|
|
# avoid div by zero
|
|
sx = (max_x - min_x) / max(1, (max_c - min_c))
|
|
sy = (max_y - min_y) / max(1, (max_r - min_r))
|
|
|
|
def center(n):
|
|
return (
|
|
min_x + (n["col"] - min_c) * sx,
|
|
min_y + (n["row"] - min_r) * sy,
|
|
)
|
|
|
|
centers = {n["tag"]: center(n) for n in nodes}
|
|
|
|
def nearest(x, y):
|
|
best_tag, best_d = None, float("inf")
|
|
for t, (cx, cy) in centers.items():
|
|
d = (cx - x) ** 2 + (cy - y) ** 2
|
|
if d < best_d:
|
|
best_d = d
|
|
best_tag = t
|
|
return best_tag
|
|
|
|
seen_edges = set()
|
|
for ln in lines:
|
|
x1, y1, x2, y2 = (
|
|
int(ln.group("x1")),
|
|
int(ln.group("y1")),
|
|
int(ln.group("x2")),
|
|
int(ln.group("y2")),
|
|
)
|
|
a = nearest(x1, y1)
|
|
b = nearest(x2, y2)
|
|
if a and b and a != b:
|
|
key = tuple(sorted((a, b)))
|
|
if key not in seen_edges:
|
|
seen_edges.add(key)
|
|
edges.append({"from": key[0], "to": key[1]})
|
|
|
|
return {
|
|
"id": class_id,
|
|
"name": class_name,
|
|
"nodes": nodes,
|
|
"edges": edges,
|
|
}
|
|
|
|
|
|
# ---------- main ----------
|
|
def main():
|
|
manifest = {"xp": {}, "factions": {}, "skills": []}
|
|
|
|
# Character XP (200 levels, 6 value columns). Character XP has no
|
|
# "Rewards" column, but every level grants skill / intel points — we
|
|
# synthesize a per-level "Level Reward" perk so the UI can show them.
|
|
char_xp = extract_xp_table(
|
|
SAMPLE / "Character XP Table - Dune Awakening.html",
|
|
[
|
|
"xpRequired",
|
|
"totalXp",
|
|
"skillPoints",
|
|
"totalSkillPoints",
|
|
"intelPoints",
|
|
"totalIntelPoints",
|
|
],
|
|
)
|
|
for row in char_xp["rows"]:
|
|
sp = row.get("skillPoints", 0) or 0
|
|
ip = row.get("intelPoints", 0) or 0
|
|
if sp == 0 and ip == 0:
|
|
continue
|
|
parts = []
|
|
if sp > 0:
|
|
parts.append(f"+{sp} Skill Point" + ("s" if sp > 1 else ""))
|
|
if ip > 0:
|
|
parts.append(f"+{ip} Intel Point" + ("s" if ip > 1 else ""))
|
|
row["perks"] = [
|
|
{
|
|
"name": "Level Reward",
|
|
"effect": " · ".join(parts),
|
|
}
|
|
]
|
|
(OUT / "character-xp.json").write_text(json.dumps(char_xp, indent=2))
|
|
manifest["xp"]["character"] = "character-xp.json"
|
|
|
|
# Specialization XP (5 tracks) — also extract perks at each level.
|
|
specs = ["Combat", "Crafting", "Exploration", "Gathering", "Sabotage"]
|
|
for spec in specs:
|
|
src = SAMPLE / f"{spec} Track XP Table - Dune Awakening.html"
|
|
data = extract_xp_table(
|
|
src, ["xpRequired", "totalXp", "intelPoints", "totalIntelPoints"]
|
|
)
|
|
perks_by_level = extract_spec_perks(src)
|
|
for row in data["rows"]:
|
|
perks = perks_by_level.get(row["level"])
|
|
if perks:
|
|
row["perks"] = perks
|
|
slug = spec.lower()
|
|
(OUT / f"spec-{slug}.json").write_text(json.dumps(data, indent=2))
|
|
manifest["xp"][slug] = f"spec-{slug}.json"
|
|
|
|
# Faction standing — different shape: Tier# | TierName | Required Rep | Cumulative
|
|
factions = [("Atreides", "atreides"), ("Harkonnen", "harkonnen")]
|
|
for fac_name, fac_id in factions:
|
|
rows = parse_table(
|
|
SAMPLE / f"House {fac_name} Faction Standing Table - Dune Awakening.html"
|
|
)
|
|
header = rows[0] if rows else []
|
|
tiers = []
|
|
for r in rows[1:]:
|
|
if not r or len(r) < 4:
|
|
continue
|
|
tiers.append(
|
|
{
|
|
"tier": to_int(r[0]),
|
|
"name": r[1].strip(),
|
|
"standingRequired": to_int(r[2]),
|
|
"totalStanding": to_int(r[3]),
|
|
}
|
|
)
|
|
(OUT / f"faction-{fac_id}.json").write_text(
|
|
json.dumps({"header": header, "tiers": tiers}, indent=2)
|
|
)
|
|
manifest["factions"][fac_id] = f"faction-{fac_id}.json"
|
|
|
|
# Skill trees
|
|
classes = [
|
|
("Bene Gesserit", "benegesserit"),
|
|
("Mentat", "mentat"),
|
|
("Planetologist", "planetologist"),
|
|
("Swordmaster", "swordmaster"),
|
|
("Trooper", "trooper"),
|
|
]
|
|
for cls_name, cls_id in classes:
|
|
path = SAMPLE / f"Dune Awakening Skill Builder - {cls_name}.html"
|
|
if not path.exists():
|
|
print(f"!! missing {path.name}")
|
|
continue
|
|
tree = extract_skill_tree(path, cls_id, cls_name)
|
|
(OUT / f"skills-{cls_id}.json").write_text(json.dumps(tree, indent=2))
|
|
manifest["skills"].append(
|
|
{
|
|
"id": cls_id,
|
|
"name": cls_name,
|
|
"file": f"skills-{cls_id}.json",
|
|
"nodes": len(tree["nodes"]),
|
|
"edges": len(tree["edges"]),
|
|
}
|
|
)
|
|
|
|
# ---------- copy referenced icon webps ----------
|
|
icon_names: set[str] = set()
|
|
for cls_name, cls_id in classes:
|
|
path = OUT / f"skills-{cls_id}.json"
|
|
if path.exists():
|
|
tree = json.loads(path.read_text())
|
|
for n in tree["nodes"]:
|
|
if n.get("icon"):
|
|
icon_names.add(n["icon"])
|
|
for spec in specs:
|
|
slug = spec.lower()
|
|
spec_data = json.loads((OUT / f"spec-{slug}.json").read_text())
|
|
for r in spec_data["rows"]:
|
|
for p in r.get("perks", []):
|
|
if p.get("icon"):
|
|
icon_names.add(p["icon"])
|
|
copied, missing = copy_icons(icon_names)
|
|
manifest["icons"] = {
|
|
"directory": "frontend/public/icons",
|
|
"served_at": "/icons/",
|
|
"count": copied,
|
|
"missing": missing,
|
|
}
|
|
|
|
(OUT / "index.json").write_text(json.dumps(manifest, indent=2))
|
|
|
|
# Print summary
|
|
print("\n=== Extraction summary ===")
|
|
cx = json.loads((OUT / "character-xp.json").read_text())
|
|
print(f"character XP rows: {len(cx['rows'])} cols: {cx['header']}")
|
|
for spec in ["combat", "crafting", "exploration", "gathering", "sabotage"]:
|
|
d = json.loads((OUT / f"spec-{spec}.json").read_text())
|
|
print(f" spec {spec:11s} rows: {len(d['rows'])}")
|
|
for fac in ["atreides", "harkonnen"]:
|
|
d = json.loads((OUT / f"faction-{fac}.json").read_text())
|
|
print(f" faction {fac:9s} tiers: {len(d['tiers'])} cols: {d['header']}")
|
|
for s in manifest["skills"]:
|
|
print(f" skills {s['id']:14s} nodes: {s['nodes']:3d} edges: {s['edges']:3d}")
|
|
print(
|
|
f" icons copied: {manifest['icons']['count']}"
|
|
+ (f" missing: {len(manifest['icons']['missing'])}" if manifest['icons']['missing'] else "")
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|