Sync all skills and memories 2026-04-14 07:27

2026-04-14 07:27:20 +09:00
parent 516bb44fe6
commit 1eba2bca95
386 changed files with 167655 additions and 0 deletions
--- a/skills/creative/ascii-video/references/optimization.md
+++ b/skills/creative/ascii-video/references/optimization.md
@@ -0,0 +1,688 @@
+# Optimization Reference
+
+> **See also:** architecture.md · composition.md · scenes.md · shaders.md · inputs.md · troubleshooting.md
+
+## Hardware Detection
+
+Detect the user's hardware at script startup and adapt rendering parameters automatically. Never hardcode worker counts or resolution.
+
+### CPU and Memory Detection
+
+```python
+import multiprocessing
+import platform
+import shutil
+import os
+
+def detect_hardware():
+    """Detect hardware capabilities and return render config."""
+    cpu_count = multiprocessing.cpu_count()
+    
+    # Leave 1-2 cores free for OS + ffmpeg encoding
+    if cpu_count >= 16:
+        workers = cpu_count - 2
+    elif cpu_count >= 8:
+        workers = cpu_count - 1
+    elif cpu_count >= 4:
+        workers = cpu_count - 1
+    else:
+        workers = max(1, cpu_count)
+    
+    # Memory detection (platform-specific)
+    try:
+        if platform.system() == "Darwin":
+            import subprocess
+            mem_bytes = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).strip())
+        elif platform.system() == "Linux":
+            with open("/proc/meminfo") as f:
+                for line in f:
+                    if line.startswith("MemTotal"):
+                        mem_bytes = int(line.split()[1]) * 1024
+                        break
+        else:
+            mem_bytes = 8 * 1024**3  # assume 8GB on unknown
+    except Exception:
+        mem_bytes = 8 * 1024**3
+
+    mem_gb = mem_bytes / (1024**3)
+    
+    # Each worker uses ~50-150MB depending on grid sizes
+    # Cap workers if memory is tight
+    mem_per_worker_mb = 150
+    max_workers_by_mem = int(mem_gb * 1024 * 0.6 / mem_per_worker_mb)  # use 60% of RAM
+    workers = min(workers, max_workers_by_mem)
+    
+    # ffmpeg availability and codec support
+    has_ffmpeg = shutil.which("ffmpeg") is not None
+    
+    return {
+        "cpu_count": cpu_count,
+        "workers": workers,
+        "mem_gb": mem_gb,
+        "platform": platform.system(),
+        "arch": platform.machine(),
+        "has_ffmpeg": has_ffmpeg,
+    }
+```
+
+### Adaptive Quality Profiles
+
+Scale resolution, FPS, CRF, and grid density based on hardware:
+
+```python
+def quality_profile(hw, target_duration_s, user_preference="auto"):
+    """
+    Returns render settings adapted to hardware.
+    user_preference: "auto", "draft", "preview", "production", "max"
+    """
+    if user_preference == "draft":
+        return {"vw": 960, "vh": 540, "fps": 12, "crf": 28, "workers": min(4, hw["workers"]),
+                "grid_scale": 0.5, "shaders": "minimal", "particles_max": 200}
+    
+    if user_preference == "preview":
+        return {"vw": 1280, "vh": 720, "fps": 15, "crf": 25, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
+    
+    if user_preference == "max":
+        return {"vw": 3840, "vh": 2160, "fps": 30, "crf": 15, "workers": hw["workers"],
+                "grid_scale": 2.0, "shaders": "full", "particles_max": 3000}
+    
+    # "production" or "auto"
+    # Auto-detect: estimate render time, downgrade if it would take too long
+    n_frames = int(target_duration_s * 24)
+    est_seconds_per_frame = 0.18  # ~180ms at 1080p
+    est_total_s = n_frames * est_seconds_per_frame / max(1, hw["workers"])
+    
+    if hw["mem_gb"] < 4 or hw["cpu_count"] <= 2:
+        # Low-end: 720p, 15fps
+        return {"vw": 1280, "vh": 720, "fps": 15, "crf": 23, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
+    
+    if est_total_s > 3600:  # would take over an hour
+        # Downgrade to 720p to speed up
+        return {"vw": 1280, "vh": 720, "fps": 24, "crf": 20, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 800}
+    
+    # Standard production: 1080p 24fps
+    return {"vw": 1920, "vh": 1080, "fps": 24, "crf": 20, "workers": hw["workers"],
+            "grid_scale": 1.0, "shaders": "full", "particles_max": 1200}
+
+
+def apply_quality_profile(profile):
+    """Set globals from quality profile."""
+    global VW, VH, FPS, N_WORKERS
+    VW = profile["vw"]
+    VH = profile["vh"]
+    FPS = profile["fps"]
+    N_WORKERS = profile["workers"]
+    # Grid sizes scale with resolution
+    # CRF passed to ffmpeg encoder
+    # Shader set determines which post-processing is active
+```
+
+### CLI Integration
+
+```python
+parser = argparse.ArgumentParser()
+parser.add_argument("--quality", choices=["draft", "preview", "production", "max", "auto"],
+                    default="auto", help="Render quality preset")
+parser.add_argument("--aspect", choices=["landscape", "portrait", "square"],
+                    default="landscape", help="Aspect ratio preset")
+parser.add_argument("--workers", type=int, default=0, help="Override worker count (0=auto)")
+parser.add_argument("--resolution", type=str, default="", help="Override resolution e.g. 1280x720")
+args = parser.parse_args()
+
+hw = detect_hardware()
+if args.workers > 0:
+    hw["workers"] = args.workers
+profile = quality_profile(hw, target_duration, args.quality)
+
+# Apply aspect ratio preset (before manual resolution override)
+ASPECT_PRESETS = {
+    "landscape": (1920, 1080),
+    "portrait":  (1080, 1920),
+    "square":    (1080, 1080),
+}
+if args.aspect != "landscape" and not args.resolution:
+    profile["vw"], profile["vh"] = ASPECT_PRESETS[args.aspect]
+
+if args.resolution:
+    w, h = args.resolution.split("x")
+    profile["vw"], profile["vh"] = int(w), int(h)
+apply_quality_profile(profile)
+
+log(f"Hardware: {hw['cpu_count']} cores, {hw['mem_gb']:.1f}GB RAM, {hw['platform']}")
+log(f"Render:   {profile['vw']}x{profile['vh']} @{profile['fps']}fps, "
+    f"CRF {profile['crf']}, {profile['workers']} workers")
+```
+
+### Portrait Mode Considerations
+
+Portrait (1080x1920) has the same pixel count as landscape 1080p, so performance is equivalent. But composition patterns differ:
+
+| Concern | Landscape | Portrait |
+|---------|-----------|----------|
+| Grid cols at `lg` | 160 | 90 |
+| Grid rows at `lg` | 45 | 80 |
+| Max text line chars | ~50 centered | ~25-30 centered |
+| Vertical rain | Short travel | Long, dramatic travel |
+| Horizontal spectrum | Full width | Needs rotation or compression |
+| Radial effects | Natural circles | Tall ellipses (aspect correction handles this) |
+| Particle explosions | Wide spread | Tall spread |
+| Text stacking | 3-4 lines comfortable | 8-10 lines comfortable |
+| Quote layout | 2-3 wide lines | 5-6 short lines |
+
+**Portrait-optimized patterns:**
+- Vertical rain/matrix effects are naturally enhanced — longer column travel
+- Fire columns rise through more screen space
+- Rising embers/particles have more vertical runway
+- Text can be stacked more aggressively with more lines
+- Radial effects work if aspect correction is applied (GridLayer handles this automatically)
+- Spectrum bars can be rotated 90 degrees (vertical bars from bottom)
+
+**Portrait text layout:**
+```python
+def layout_text_portrait(text, max_chars_per_line=25, grid=None):
+    """Break text into short lines for portrait display."""
+    words = text.split()
+    lines = []; current = ""
+    for w in words:
+        if len(current) + len(w) + 1 > max_chars_per_line:
+            lines.append(current.strip())
+            current = w + " "
+        else:
+            current += w + " "
+    if current.strip():
+        lines.append(current.strip())
+    return lines
+```
+
+## Performance Budget
+
+Target: 100-200ms per frame (5-10 fps single-threaded, 40-80 fps across 8 workers).
+
+| Component | Time | Notes |
+|-----------|------|-------|
+| Feature extraction | 1-5ms | Pre-computed for all frames before render |
+| Effect function | 2-15ms | Vectorized numpy, avoid Python loops |
+| Character render | 80-150ms | **Bottleneck** -- per-cell Python loop |
+| Shader pipeline | 5-25ms | Depends on active shaders |
+| ffmpeg encode | ~5ms | Amortized by pipe buffering |
+
+## Bitmap Pre-Rasterization
+
+Rasterize every character at init, not per-frame:
+
+```python
+# At init time -- done once
+for c in all_characters:
+    img = Image.new("L", (cell_w, cell_h), 0)
+    ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
+    bitmaps[c] = np.array(img, dtype=np.float32) / 255.0  # float32 for fast multiply
+
+# At render time -- fast lookup
+bitmap = bitmaps[char]
+canvas[y:y+ch, x:x+cw] = np.maximum(canvas[y:y+ch, x:x+cw],
+                                      (bitmap[:,:,None] * color).astype(np.uint8))
+```
+
+Collect all characters from all palettes + overlay text into the init set. Lazy-init for any missed characters.
+
+## Pre-Rendered Background Textures
+
+Alternative to `_render_vf()` for backgrounds where characters don't need to change every frame. Pre-bake a static ASCII texture once at init, then multiply by a per-cell color field each frame. One matrix multiply vs thousands of bitmap blits.
+
+Use when: background layer uses a fixed character palette and only color/brightness varies per frame. NOT suitable for layers where character selection depends on a changing value field.
+
+### Init: Bake the Texture
+
+```python
+# In GridLayer.__init__:
+self._bg_row_idx = np.clip(
+    (np.arange(VH) - self.oy) // self.ch, 0, self.rows - 1
+)
+self._bg_col_idx = np.clip(
+    (np.arange(VW) - self.ox) // self.cw, 0, self.cols - 1
+)
+self._bg_textures = {}
+
+def make_bg_texture(self, palette):
+    """Pre-render a static ASCII texture (grayscale float32) once."""
+    if palette not in self._bg_textures:
+        texture = np.zeros((VH, VW), dtype=np.float32)
+        rng = random.Random(12345)
+        ch_list = [c for c in palette if c != " " and c in self.bm]
+        if not ch_list:
+            ch_list = list(self.bm.keys())[:5]
+        for row in range(self.rows):
+            y = self.oy + row * self.ch
+            if y + self.ch > VH:
+                break
+            for col in range(self.cols):
+                x = self.ox + col * self.cw
+                if x + self.cw > VW:
+                    break
+                bm = self.bm[rng.choice(ch_list)]
+                texture[y:y+self.ch, x:x+self.cw] = bm
+        self._bg_textures[palette] = texture
+    return self._bg_textures[palette]
+```
+
+### Render: Color Field x Cached Texture
+
+```python
+def render_bg(self, color_field, palette=PAL_CIRCUIT):
+    """Fast background: pre-rendered ASCII texture * per-cell color field.
+    color_field: (rows, cols, 3) uint8. Returns (VH, VW, 3) uint8."""
+    texture = self.make_bg_texture(palette)
+    # Expand cell colors to pixel coords via pre-computed index maps
+    color_px = color_field[
+        self._bg_row_idx[:, None], self._bg_col_idx[None, :]
+    ].astype(np.float32)
+    return (texture[:, :, None] * color_px).astype(np.uint8)
+```
+
+### Usage in a Scene
+
+```python
+# Build per-cell color from effect fields (cheap — rows*cols, not VH*VW)
+hue = ((t * 0.05 + val * 0.2) % 1.0).astype(np.float32)
+R, G, B = hsv2rgb(hue, np.full_like(val, 0.5), val)
+color_field = mkc(R, G, B, g.rows, g.cols)  # (rows, cols, 3) uint8
+
+# Render background — single matrix multiply, no per-cell loop
+canvas_bg = g.render_bg(color_field, PAL_DENSE)
+```
+
+The texture init loop runs once and is cached per palette. Per-frame cost is one fancy-index lookup + one broadcast multiply — orders of magnitude faster than the per-cell bitmap blit loop in `render()` for dense backgrounds.
+
+## Coordinate Array Caching
+
+Pre-compute all grid-relative coordinate arrays at init, not per-frame:
+
+```python
+# These are O(rows*cols) and used in every effect
+self.rr = np.arange(rows)[:, None]    # row indices
+self.cc = np.arange(cols)[None, :]    # col indices
+self.dist = np.sqrt(dx**2 + dy**2)   # distance from center
+self.angle = np.arctan2(dy, dx)       # angle from center
+self.dist_n = ...                      # normalized distance
+```
+
+## Vectorized Effect Patterns
+
+### Avoid Per-Cell Python Loops in Effects
+
+The render loop (compositing bitmaps) is unavoidably per-cell. But effect functions must be fully vectorized numpy -- never iterate over rows/cols in Python.
+
+Bad (O(rows*cols) Python loop):
+```python
+for r in range(rows):
+    for c in range(cols):
+        val[r, c] = math.sin(c * 0.1 + t) * math.cos(r * 0.1 - t)
+```
+
+Good (vectorized):
+```python
+val = np.sin(g.cc * 0.1 + t) * np.cos(g.rr * 0.1 - t)
+```
+
+### Vectorized Matrix Rain
+
+The naive per-column per-trail-pixel loop is the second biggest bottleneck after the render loop. Use numpy fancy indexing:
+
+```python
+# Instead of nested Python loops over columns and trail pixels:
+# Build row index arrays for all active trail pixels at once
+all_rows = []
+all_cols = []
+all_fades = []
+for c in range(cols):
+    head = int(S["ry"][c])
+    trail_len = S["rln"][c]
+    for i in range(trail_len):
+        row = head - i
+        if 0 <= row < rows:
+            all_rows.append(row)
+            all_cols.append(c)
+            all_fades.append(1.0 - i / trail_len)
+
+# Vectorized assignment
+ar = np.array(all_rows)
+ac = np.array(all_cols)
+af = np.array(all_fades, dtype=np.float32)
+# Assign chars and colors in bulk using fancy indexing
+ch[ar, ac] = ...  # vectorized char assignment
+co[ar, ac, 1] = (af * bri * 255).astype(np.uint8)  # green channel
+```
+
+### Vectorized Fire Columns
+
+Same pattern -- accumulate index arrays, assign in bulk:
+
+```python
+fire_val = np.zeros((rows, cols), dtype=np.float32)
+for fi in range(n_cols):
+    fx_c = int((fi * cols / n_cols + np.sin(t * 2 + fi * 0.7) * 3) % cols)
+    height = int(energy * rows * 0.7)
+    dy = np.arange(min(height, rows))
+    fr = rows - 1 - dy
+    frac = dy / max(height, 1)
+    # Width spread: base columns wider at bottom
+    for dx in range(-1, 2):  # 3-wide columns
+        c = fx_c + dx
+        if 0 <= c < cols:
+            fire_val[fr, c] = np.maximum(fire_val[fr, c],
+                                          (1 - frac * 0.6) * (0.5 + rms * 0.5))
+# Now map fire_val to chars and colors in one vectorized pass
+```
+
+## PIL String Rendering for Text-Heavy Scenes
+
+Alternative to per-cell bitmap blitting when rendering many long text strings (scrolling tickers, typewriter sequences, idea floods). Uses PIL's native `ImageDraw.text()` which renders an entire string in one C call, vs one Python-loop bitmap blit per character.
+
+Typical win: a scene with 56 ticker rows renders 56 PIL `text()` calls instead of ~10K individual bitmap blits.
+
+Use when: scene renders many rows of readable text strings. NOT suitable for sparse or spatially-scattered single characters (use normal `render()` for those).
+
+```python
+from PIL import Image, ImageDraw
+
+def render_text_layer(grid, rows_data, font):
+    """Render dense text rows via PIL instead of per-cell bitmap blitting.
+
+    Args:
+        grid: GridLayer instance (for oy, ch, ox, font metrics)
+        rows_data: list of (row_index, text_string, rgb_tuple) — one per row
+        font: PIL ImageFont instance (grid.font)
+
+    Returns:
+        uint8 array (VH, VW, 3) — canvas with rendered text
+    """
+    img = Image.new("RGB", (VW, VH), (0, 0, 0))
+    draw = ImageDraw.Draw(img)
+    for row_idx, text, color in rows_data:
+        y = grid.oy + row_idx * grid.ch
+        if y + grid.ch > VH:
+            break
+        draw.text((grid.ox, y), text, fill=color, font=font)
+    return np.array(img)
+```
+
+### Usage in a Ticker Scene
+
+```python
+# Build ticker data (text + color per row)
+rows_data = []
+for row in range(n_tickers):
+    text = build_ticker_text(row, t)       # scrolling substring
+    color = hsv2rgb_scalar(hue, 0.85, bri) # (R, G, B) tuple
+    rows_data.append((row, text, color))
+
+# One PIL pass instead of thousands of bitmap blits
+canvas_tickers = render_text_layer(g_md, rows_data, g_md.font)
+
+# Blend with other layers normally
+result = blend_canvas(canvas_bg, canvas_tickers, "screen", 0.9)
+```
+
+This is purely a rendering optimization — same visual output, fewer draw calls. The grid's `render()` method is still needed for sparse character fields where characters are placed individually based on value fields.
+
+## Bloom Optimization
+
+**Do NOT use `scipy.ndimage.uniform_filter`** -- measured at 424ms/frame.
+
+Use 4x downsample + manual box blur instead -- 84ms/frame (5x faster):
+
+```python
+sm = canvas[::4, ::4].astype(np.float32)  # 4x downsample
+br = np.where(sm > threshold, sm, 0)
+for _ in range(3):                          # 3-pass manual box blur
+    p = np.pad(br, ((1,1),(1,1),(0,0)), mode='edge')
+    br = (p[:-2,:-2] + p[:-2,1:-1] + p[:-2,2:] +
+          p[1:-1,:-2] + p[1:-1,1:-1] + p[1:-1,2:] +
+          p[2:,:-2] + p[2:,1:-1] + p[2:,2:]) / 9.0
+bl = np.repeat(np.repeat(br, 4, axis=0), 4, axis=1)[:H, :W]
+```
+
+## Vignette Caching
+
+Distance field is resolution- and strength-dependent, never changes per frame:
+
+```python
+_vig_cache = {}
+def sh_vignette(canvas, strength):
+    key = (canvas.shape[0], canvas.shape[1], round(strength, 2))
+    if key not in _vig_cache:
+        Y = np.linspace(-1, 1, H)[:, None]
+        X = np.linspace(-1, 1, W)[None, :]
+        _vig_cache[key] = np.clip(1.0 - np.sqrt(X**2+Y**2) * strength, 0.15, 1).astype(np.float32)
+    return np.clip(canvas * _vig_cache[key][:,:,None], 0, 255).astype(np.uint8)
+```
+
+Same pattern for CRT barrel distortion (cache remap coordinates).
+
+## Film Grain Optimization
+
+Generate noise at half resolution, tile up:
+
+```python
+noise = np.random.randint(-amt, amt+1, (H//2, W//2, 1), dtype=np.int16)
+noise = np.repeat(np.repeat(noise, 2, axis=0), 2, axis=1)[:H, :W]
+```
+
+2x blocky grain looks like film grain and costs 1/4 the random generation.
+
+## Parallel Rendering
+
+### Worker Architecture
+
+```python
+hw = detect_hardware()
+N_WORKERS = hw["workers"]
+
+# Batch splitting (for non-clip architectures)
+batch_size = (n_frames + N_WORKERS - 1) // N_WORKERS
+batches = [(i, i*batch_size, min((i+1)*batch_size, n_frames), features, seg_path) ...]
+
+with multiprocessing.Pool(N_WORKERS) as pool:
+    segments = pool.starmap(render_batch, batches)
+```
+
+### Per-Clip Parallelism (Preferred for Segmented Videos)
+
+```python
+from concurrent.futures import ProcessPoolExecutor, as_completed
+
+with ProcessPoolExecutor(max_workers=N_WORKERS) as pool:
+    futures = {pool.submit(render_clip, seg, features, path): seg["id"]
+               for seg, path in clip_args}
+    for fut in as_completed(futures):
+        clip_id = futures[fut]
+        try:
+            fut.result()
+            log(f"  {clip_id} done")
+        except Exception as e:
+            log(f"  {clip_id} FAILED: {e}")
+```
+
+### Worker Isolation
+
+Each worker:
+- Creates its own `Renderer` instance (with full grid + bitmap init)
+- Opens its own ffmpeg subprocess
+- Has independent random seed (`random.seed(batch_id * 10000)`)
+- Writes to its own segment file and stderr log
+
+### ffmpeg Pipe Safety
+
+**CRITICAL**: Never `stderr=subprocess.PIPE` with long-running ffmpeg. The stderr buffer fills at ~64KB and deadlocks:
+
+```python
+# WRONG -- will deadlock
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+
+# RIGHT -- stderr to file
+stderr_fh = open(err_path, "w")
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=stderr_fh)
+# ... write all frames ...
+pipe.stdin.close()
+pipe.wait()
+stderr_fh.close()
+```
+
+### Concatenation
+
+```python
+with open(concat_file, "w") as cf:
+    for seg in segments:
+        cf.write(f"file '{seg}'\n")
+
+cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_file]
+if audio_path:
+    cmd += ["-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", "-shortest"]
+else:
+    cmd += ["-c:v", "copy"]
+cmd.append(output_path)
+subprocess.run(cmd, capture_output=True, check=True)
+```
+
+## Particle System Performance
+
+Cap particle counts based on quality profile:
+
+| System | Low | Standard | High |
+|--------|-----|----------|------|
+| Explosion | 300 | 1000 | 2500 |
+| Embers | 500 | 1500 | 3000 |
+| Starfield | 300 | 800 | 1500 |
+| Dissolve | 200 | 600 | 1200 |
+
+Cull by truncating lists:
+```python
+MAX_PARTICLES = profile.get("particles_max", 1200)
+if len(S["px"]) > MAX_PARTICLES:
+    for k in ("px", "py", "vx", "vy", "life", "char"):
+        S[k] = S[k][-MAX_PARTICLES:]  # keep newest
+```
+
+## Memory Management
+
+- Feature arrays: pre-computed for all frames, shared across workers via fork semantics (COW)
+- Canvas: allocated once per worker, reused (`np.zeros(...)`)
+- Character arrays: allocated per frame (cheap -- rows*cols U1 strings)
+- Bitmap cache: ~500KB per grid size, initialized once per worker
+
+Total memory per worker: ~50-150MB. Total: ~400-800MB for 8 workers.
+
+For low-memory systems (< 4GB), reduce worker count and use smaller grids.
+
+## Brightness Verification
+
+After render, spot-check brightness at sample timestamps:
+
+```python
+for t in [2, 30, 60, 120, 180]:
+    cmd = ["ffmpeg", "-ss", str(t), "-i", output_path,
+           "-frames:v", "1", "-f", "rawvideo", "-pix_fmt", "rgb24", "-"]
+    r = subprocess.run(cmd, capture_output=True)
+    arr = np.frombuffer(r.stdout, dtype=np.uint8)
+    print(f"t={t}s  mean={arr.mean():.1f}  max={arr.max()}")
+```
+
+Target: mean > 5 for quiet sections, mean > 15 for active sections. If consistently below, increase brightness floor in effects and/or global boost multiplier.
+
+## Render Time Estimates
+
+Scale with hardware. Baseline: 1080p, 24fps, ~180ms/frame/worker.
+
+| Duration | Frames | 4 workers | 8 workers | 16 workers |
+|----------|--------|-----------|-----------|------------|
+| 30s | 720 | ~3 min | ~2 min | ~1 min |
+| 2 min | 2,880 | ~13 min | ~7 min | ~4 min |
+| 3.5 min | 5,040 | ~23 min | ~12 min | ~6 min |
+| 5 min | 7,200 | ~33 min | ~17 min | ~9 min |
+| 10 min | 14,400 | ~65 min | ~33 min | ~17 min |
+
+At 720p: multiply times by ~0.5. At 4K: multiply by ~4.
+
+Heavier effects (many particles, dense grids, extra shader passes) add ~20-50%.
+
+---
+
+## Temp File Cleanup
+
+Rendering generates intermediate files that accumulate across runs. Clean up after the final concat/mux step.
+
+### Files to Clean
+
+| File type | Source | Location |
+|-----------|--------|----------|
+| WAV extracts | `ffmpeg -i input.mp3 ... tmp.wav` | `tempfile.mktemp()` or project dir |
+| Segment clips | `render_clip()` output | `segments/seg_00.mp4` etc. |
+| Concat list | ffmpeg concat demuxer input | `segments/concat.txt` |
+| ffmpeg stderr logs | piped to file for debugging | `*.log` in project dir |
+| Feature cache | pickled numpy arrays | `*.pkl` or `*.npz` |
+
+### Cleanup Function
+
+```python
+import glob
+import tempfile
+import shutil
+
+def cleanup_render_artifacts(segments_dir="segments", keep_final=True):
+    """Remove intermediate files after successful render.
+    
+    Call this AFTER verifying the final output exists and plays correctly.
+    
+    Args:
+        segments_dir: directory containing segment clips and concat list
+        keep_final: if True, only delete intermediates (not the final output)
+    """
+    removed = []
+    
+    # 1. Segment clips
+    if os.path.isdir(segments_dir):
+        shutil.rmtree(segments_dir)
+        removed.append(f"directory: {segments_dir}")
+    
+    # 2. Temporary WAV files
+    for wav in glob.glob("*.wav"):
+        if wav.startswith("tmp") or wav.startswith("extracted_"):
+            os.remove(wav)
+            removed.append(wav)
+    
+    # 3. ffmpeg stderr logs
+    for log in glob.glob("ffmpeg_*.log"):
+        os.remove(log)
+        removed.append(log)
+    
+    # 4. Feature cache (optional — useful to keep for re-renders)
+    # for cache in glob.glob("features_*.npz"):
+    #     os.remove(cache)
+    #     removed.append(cache)
+    
+    print(f"Cleaned {len(removed)} artifacts: {removed}")
+    return removed
+```
+
+### Integration with Render Pipeline
+
+Call cleanup at the end of the main render script, after the final output is verified:
+
+```python
+# At end of main()
+if os.path.exists(output_path) and os.path.getsize(output_path) > 1000:
+    cleanup_render_artifacts(segments_dir="segments")
+    print(f"Done. Output: {output_path}")
+else:
+    print("WARNING: final output missing or empty — skipping cleanup")
+```
+
+### Temp File Best Practices
+
+- Use `tempfile.mkdtemp()` for segment directories — avoids polluting the project dir
+- Name WAV extracts with `tempfile.mktemp(suffix=".wav")` so they're in the OS temp dir
+- For debugging, set `KEEP_INTERMEDIATES=1` env var to skip cleanup
+- Feature caches (`.npz`) are cheap to store and expensive to recompute — default to keeping them