aman/scripts/generate_docs_media.py
Thales Maciel 359b5fbaf4 Land milestone 4 first-run docs and media
Make the X11 user path visible on first contact instead of burying it under config and maintainer detail.

Rewrite the README around the supported quickstart, expected tray and dictation result, install validation, troubleshooting, and linked follow-on docs. Split deep config and developer material into separate docs, add checked-in screenshots plus a short WebM walkthrough, and add a generator so the media assets stay reproducible.

Also fix the CLI discovery gap by letting `aman --help` show the top-level command surface while keeping implicit foreground `run` behavior, and align the settings, help, and about copy with the supported service-plus-diagnostics model.

Validation: `PYTHONPATH=src python3 -m unittest tests.test_aman_cli tests.test_config_ui`; `PYTHONPATH=src python3 -m unittest discover -s tests -p 'test_*.py'`; `python3 -m py_compile src/*.py tests/*.py scripts/generate_docs_media.py`; `PYTHONPATH=src python3 -m aman --help`.

Milestone 4 stays open in the roadmap because `docs/x11-ga/first-run-review-notes.md` still needs a real non-implementer walkthrough.
2026-03-12 18:30:34 -03:00

338 lines
12 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import subprocess
import tempfile
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
ROOT = Path(__file__).resolve().parents[1]
MEDIA_DIR = ROOT / "docs" / "media"
FONT_REGULAR = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_BOLD = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
def font(size: int, *, bold: bool = False) -> ImageFont.ImageFont:
candidate = FONT_BOLD if bold else FONT_REGULAR
try:
return ImageFont.truetype(candidate, size=size)
except OSError:
return ImageFont.load_default()
def draw_round_rect(draw: ImageDraw.ImageDraw, box, radius: int, *, fill, outline=None, width=1):
draw.rounded_rectangle(box, radius=radius, fill=fill, outline=outline, width=width)
def draw_background(size: tuple[int, int], *, light=False) -> Image.Image:
w, h = size
image = Image.new("RGBA", size, "#0d111b" if not light else "#e5e8ef")
draw = ImageDraw.Draw(image)
for y in range(h):
mix = y / max(1, h - 1)
if light:
color = (
int(229 + (240 - 229) * mix),
int(232 + (241 - 232) * mix),
int(239 + (246 - 239) * mix),
255,
)
else:
color = (
int(13 + (30 - 13) * mix),
int(17 + (49 - 17) * mix),
int(27 + (79 - 27) * mix),
255,
)
draw.line((0, y, w, y), fill=color)
draw.ellipse((60, 70, 360, 370), fill=(43, 108, 176, 90))
draw.ellipse((w - 360, h - 340, w - 40, h - 20), fill=(14, 116, 144, 70))
draw.ellipse((w - 260, 40, w - 80, 220), fill=(244, 114, 182, 50))
return image
def paste_center(base: Image.Image, overlay: Image.Image, top: int) -> tuple[int, int]:
x = (base.width - overlay.width) // 2
base.alpha_composite(overlay, (x, top))
return (x, top)
def draw_text_block(
draw: ImageDraw.ImageDraw,
origin: tuple[int, int],
lines: list[str],
*,
fill,
title=None,
title_fill=None,
line_gap=12,
body_font=None,
title_font=None,
):
x, y = origin
title_font = title_font or font(26, bold=True)
body_font = body_font or font(22)
if title:
draw.text((x, y), title, font=title_font, fill=title_fill or fill)
y += title_font.size + 10
for line in lines:
draw.text((x, y), line, font=body_font, fill=fill)
y += body_font.size + line_gap
def build_settings_window() -> Image.Image:
base = draw_background((1440, 900))
window = Image.new("RGBA", (1180, 760), (248, 250, 252, 255))
draw = ImageDraw.Draw(window)
draw_round_rect(draw, (0, 0, 1179, 759), 26, fill="#f8fafc", outline="#cbd5e1", width=2)
draw_round_rect(draw, (0, 0, 1179, 74), 26, fill="#182130")
draw.rectangle((0, 40, 1179, 74), fill="#182130")
draw.text((32, 22), "Aman Settings (Required)", font=font(28, bold=True), fill="#f8fafc")
draw.text((970, 24), "Cancel", font=font(20), fill="#cbd5e1")
draw_round_rect(draw, (1055, 14, 1146, 58), 16, fill="#0f766e")
draw.text((1080, 24), "Apply", font=font(20, bold=True), fill="#f8fafc")
draw_round_rect(draw, (26, 94, 1154, 160), 18, fill="#fff7d6", outline="#facc15")
draw_text_block(
draw,
(48, 112),
["Aman needs saved settings before it can start recording from the tray."],
fill="#4d3a00",
)
draw_round_rect(draw, (26, 188, 268, 734), 20, fill="#eef2f7", outline="#d7dee9")
sections = ["General", "Audio", "Runtime & Models", "Help", "About"]
y = 224
for index, label in enumerate(sections):
active = index == 0
fill = "#dbeafe" if active else "#eef2f7"
outline = "#93c5fd" if active else "#eef2f7"
draw_round_rect(draw, (46, y, 248, y + 58), 16, fill=fill, outline=outline)
draw.text((68, y + 16), label, font=font(22, bold=active), fill="#0f172a")
y += 76
draw_round_rect(draw, (300, 188, 1154, 734), 20, fill="#ffffff", outline="#d7dee9")
draw_text_block(draw, (332, 220), [], title="General", fill="#0f172a", title_font=font(30, bold=True))
labels = [
("Trigger hotkey", "Super+m"),
("Text injection", "Clipboard paste (recommended)"),
("Transcription language", "Auto detect"),
("Profile", "Default"),
]
y = 286
for label, value in labels:
draw.text((332, y), label, font=font(22, bold=True), fill="#0f172a")
draw_round_rect(draw, (572, y - 8, 1098, y + 38), 14, fill="#f8fafc", outline="#cbd5e1")
draw.text((596, y + 4), value, font=font(20), fill="#334155")
y += 92
draw_round_rect(draw, (332, 480, 1098, 612), 18, fill="#f0fdf4", outline="#86efac")
draw_text_block(
draw,
(360, 512),
[
"Supported first-run path:",
"1. Pick the microphone you want to use.",
"2. Keep the recommended clipboard backend.",
"3. Click Apply and wait for the tray to return to Idle.",
],
fill="#166534",
body_font=font(20),
)
draw_round_rect(draw, (332, 638, 1098, 702), 18, fill="#e0f2fe", outline="#7dd3fc")
draw.text(
(360, 660),
"After setup, put your cursor in a text field and say: hello from Aman",
font=font(20, bold=True),
fill="#155e75",
)
background = base.copy()
paste_center(background, window, 70)
return background.convert("RGB")
def build_tray_menu() -> Image.Image:
base = draw_background((1280, 900), light=True)
draw = ImageDraw.Draw(base)
draw_round_rect(draw, (0, 0, 1279, 54), 0, fill="#111827")
draw.text((42, 16), "X11 Session", font=font(20, bold=True), fill="#e5e7eb")
draw_round_rect(draw, (1038, 10, 1180, 42), 14, fill="#1f2937", outline="#374151")
draw.text((1068, 17), "Idle", font=font(18, bold=True), fill="#e5e7eb")
menu = Image.new("RGBA", (420, 520), (255, 255, 255, 255))
menu_draw = ImageDraw.Draw(menu)
draw_round_rect(menu_draw, (0, 0, 419, 519), 22, fill="#ffffff", outline="#cbd5e1", width=2)
items = [
"Settings...",
"Help",
"About",
"Pause Aman",
"Reload Config",
"Run Diagnostics",
"Open Config Path",
"Quit",
]
y = 26
for label in items:
highlighted = label == "Run Diagnostics"
if highlighted:
draw_round_rect(menu_draw, (16, y - 6, 404, y + 40), 14, fill="#dbeafe")
menu_draw.text((34, y), label, font=font(22, bold=highlighted), fill="#0f172a")
y += 58
if label in {"About", "Run Diagnostics"}:
menu_draw.line((24, y - 10, 396, y - 10), fill="#e2e8f0", width=2)
paste_center(base, menu, 118)
return base.convert("RGB")
def build_terminal_scene() -> Image.Image:
image = Image.new("RGB", (1280, 720), "#0b1220")
draw = ImageDraw.Draw(image)
draw_round_rect(draw, (100, 80, 1180, 640), 24, fill="#0f172a", outline="#334155", width=2)
draw_round_rect(draw, (100, 80, 1180, 132), 24, fill="#111827")
draw.rectangle((100, 112, 1180, 132), fill="#111827")
draw.text((136, 97), "Terminal", font=font(26, bold=True), fill="#e2e8f0")
draw.text((168, 192), "$ sha256sum -c aman-x11-linux-0.1.0.tar.gz.sha256", font=font(22), fill="#86efac")
draw.text((168, 244), "aman-x11-linux-0.1.0.tar.gz: OK", font=font(22), fill="#cbd5e1")
draw.text((168, 310), "$ tar -xzf aman-x11-linux-0.1.0.tar.gz", font=font(22), fill="#86efac")
draw.text((168, 362), "$ cd aman-x11-linux-0.1.0", font=font(22), fill="#86efac")
draw.text((168, 414), "$ ./install.sh", font=font(22), fill="#86efac")
draw.text((168, 482), "Installed aman.service and started the user service.", font=font(22), fill="#cbd5e1")
draw.text((168, 534), "Waiting for first-run settings...", font=font(22), fill="#7dd3fc")
draw.text((128, 30), "1. Install the portable bundle", font=font(34, bold=True), fill="#f8fafc")
return image
def build_editor_scene(*, badge: str | None = None, text: str = "", subtitle: str) -> Image.Image:
image = draw_background((1280, 720), light=True).convert("RGB")
draw = ImageDraw.Draw(image)
draw_round_rect(draw, (84, 64, 1196, 642), 26, fill="#ffffff", outline="#cbd5e1", width=2)
draw_round_rect(draw, (84, 64, 1196, 122), 26, fill="#f8fafc")
draw.rectangle((84, 94, 1196, 122), fill="#f8fafc")
draw.text((122, 84), "Focused editor", font=font(24, bold=True), fill="#0f172a")
draw.text((122, 158), subtitle, font=font(26, bold=True), fill="#0f172a")
draw_round_rect(draw, (996, 80, 1144, 116), 16, fill="#111827")
draw.text((1042, 89), "Idle", font=font(18, bold=True), fill="#e5e7eb")
if badge:
fill = {"Recording": "#dc2626", "STT": "#2563eb", "AI Processing": "#0f766e"}[badge]
draw_round_rect(draw, (122, 214, 370, 262), 18, fill=fill)
draw.text((150, 225), badge, font=font(24, bold=True), fill="#f8fafc")
draw_round_rect(draw, (122, 308, 1158, 572), 22, fill="#f8fafc", outline="#d7dee9")
if text:
draw.multiline_text((156, 350), text, font=font(34), fill="#0f172a", spacing=18)
else:
draw.text((156, 366), "Cursor ready for dictation...", font=font(32), fill="#64748b")
return image
def build_demo_webm(settings_png: Path, tray_png: Path, output: Path) -> None:
scenes = [
("01-install.png", build_terminal_scene(), 3.0),
("02-settings.png", Image.open(settings_png).resize((1280, 800)).crop((0, 40, 1280, 760)), 4.0),
("03-tray.png", Image.open(tray_png).resize((1280, 900)).crop((0, 90, 1280, 810)), 3.0),
(
"04-editor-ready.png",
build_editor_scene(
subtitle="2. Press the hotkey and say: hello from Aman",
text="",
),
3.0,
),
(
"05-recording.png",
build_editor_scene(
badge="Recording",
subtitle="Tray and status now show recording",
text="",
),
1.5,
),
(
"06-stt.png",
build_editor_scene(
badge="STT",
subtitle="Aman transcribes the audio locally",
text="",
),
1.5,
),
(
"07-processing.png",
build_editor_scene(
badge="AI Processing",
subtitle="Cleanup and injection finish automatically",
text="",
),
1.5,
),
(
"08-result.png",
build_editor_scene(
subtitle="3. The text lands in the focused app",
text="Hello from Aman.",
),
4.0,
),
]
with tempfile.TemporaryDirectory() as td:
temp_dir = Path(td)
concat = temp_dir / "scenes.txt"
concat_lines: list[str] = []
for name, image, duration in scenes:
frame_path = temp_dir / name
image.convert("RGB").save(frame_path, format="PNG")
concat_lines.append(f"file '{frame_path.as_posix()}'")
concat_lines.append(f"duration {duration}")
concat_lines.append(f"file '{(temp_dir / scenes[-1][0]).as_posix()}'")
concat.write_text("\n".join(concat_lines) + "\n", encoding="utf-8")
subprocess.run(
[
"ffmpeg",
"-y",
"-f",
"concat",
"-safe",
"0",
"-i",
str(concat),
"-vf",
"fps=24,format=yuv420p",
"-c:v",
"libvpx-vp9",
"-b:v",
"0",
"-crf",
"34",
str(output),
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def main() -> None:
MEDIA_DIR.mkdir(parents=True, exist_ok=True)
settings_png = MEDIA_DIR / "settings-window.png"
tray_png = MEDIA_DIR / "tray-menu.png"
demo_webm = MEDIA_DIR / "first-run-demo.webm"
build_settings_window().save(settings_png, format="PNG")
build_tray_menu().save(tray_png, format="PNG")
build_demo_webm(settings_png, tray_png, demo_webm)
print(f"wrote {settings_png}")
print(f"wrote {tray_png}")
print(f"wrote {demo_webm}")
if __name__ == "__main__":
main()