Sync all skills and memories 2026-04-14 07:27

This commit is contained in:
2026-04-14 07:27:20 +09:00
parent 516bb44fe6
commit 1eba2bca95
386 changed files with 167655 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
# OBLITERATUS Abliteration Config
# Usage: obliteratus run this-file.yaml
#
# This is for reproducible, version-controlled abliteration runs.
# For one-off usage, the CLI flags are simpler.
# Model to abliterate
model:
name: "meta-llama/Llama-3.1-8B-Instruct"
dtype: "bfloat16" # float16, bfloat16, float32
quantization: null # null, "4bit", "8bit"
device: "auto" # auto, cuda, cuda:0, cpu
# Abliteration method and parameters
abliteration:
method: "informed" # See SKILL.md Step 4 for all 13 methods
n_directions: null # null = auto-detect, or integer (e.g., 8)
regularization: 0.0 # 0.0-1.0, fraction of original to preserve
refinement_passes: 1 # Iterative passes (increase for self-repair)
norm_preserve: true # Keep weight norms intact after projection
# Output
output:
directory: "./abliterated-models"
save_metadata: true # Save abliteration_metadata.json alongside model
contribute: false # Save community contribution data
# Verification
verify:
enabled: true
test_prompts: null # null = use built-in test prompts
compute_perplexity: true
compute_kl: true

View File

@@ -0,0 +1,40 @@
# OBLITERATUS Analysis Study Config
# Usage: obliteratus run this-file.yaml --preset jailbreak
#
# Run analysis modules to understand refusal geometry BEFORE abliterating.
# Useful for research or when you want to understand what you're removing.
# Model to analyze
model:
name: "meta-llama/Llama-3.1-8B-Instruct"
dtype: "bfloat16"
quantization: "4bit" # Saves VRAM for analysis
device: "auto"
# Study configuration
study:
# Available presets: quick, full, attention, jailbreak, guardrail, knowledge
preset: "jailbreak"
# Or specify individual strategies:
# strategies:
# - layer_removal
# - head_pruning
# - ffn_ablation
# - embedding_ablation
# Analysis modules to run (subset of the 27 available)
analysis:
- alignment_imprint # Detect DPO/RLHF/CAI/SFT training method
- concept_geometry # Map refusal cone geometry
- logit_lens # Find which layer decides to refuse
- anti_ouroboros # Detect self-repair tendency
- cross_layer # Cross-layer alignment clustering
- causal_tracing # Causal necessity of components
- residual_stream # Attention vs MLP contribution
# Output
output:
directory: "./analysis-results"
save_plots: true # Generate matplotlib visualizations
save_report: true # Generate markdown report

View File

@@ -0,0 +1,41 @@
# OBLITERATUS Batch Abliteration Config
# Abliterate multiple models with the same method for comparison.
#
# Run each one sequentially:
# for model in models; do obliteratus obliterate $model --method informed; done
#
# Or use this as a reference for which models to process.
# Common settings
defaults:
method: "informed"
quantization: "4bit"
output_dir: "./abliterated-models"
# Models to process (grouped by compute tier)
models:
# Small (4-8 GB VRAM)
small:
- "Qwen/Qwen2.5-1.5B-Instruct"
- "microsoft/Phi-3.5-mini-instruct"
- "meta-llama/Llama-3.2-3B-Instruct"
# Medium (8-16 GB VRAM)
medium:
- "meta-llama/Llama-3.1-8B-Instruct"
- "mistralai/Mistral-7B-Instruct-v0.3"
- "google/gemma-2-9b-it"
- "Qwen/Qwen2.5-7B-Instruct"
# Large (24 GB VRAM, 4-bit quantization)
large:
- "Qwen/Qwen2.5-14B-Instruct"
- "Qwen/Qwen3-32B"
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
# Per-model method overrides (optional)
overrides:
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B":
method: "surgical" # CoT-aware for reasoning models
"mistralai/Mixtral-8x7B-Instruct-v0.1":
method: "nuclear" # Expert-granular for MoE models