Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled
Some checks failed
ci / test-and-build (push) Has been cancelled
This commit is contained in:
parent
98b13d1069
commit
8c1f7c1e13
38 changed files with 5300 additions and 503 deletions
77
benchmarks/model_matrix.small_first.json
Normal file
77
benchmarks/model_matrix.small_first.json
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
{
|
||||
"warmup_runs": 1,
|
||||
"measured_runs": 2,
|
||||
"timeout_sec": 120,
|
||||
"baseline_model": {
|
||||
"name": "qwen2.5-1.5b-instruct-q4_k_m",
|
||||
"provider": "local_llama",
|
||||
"model_path": "/path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf",
|
||||
"profile": "default",
|
||||
"param_grid": {
|
||||
"temperature": [0.0],
|
||||
"max_tokens": [192],
|
||||
"top_p": [0.95],
|
||||
"top_k": [40],
|
||||
"repeat_penalty": [1.0],
|
||||
"min_p": [0.0]
|
||||
}
|
||||
},
|
||||
"candidate_models": [
|
||||
{
|
||||
"name": "qwen2.5-0.5b-instruct-q4_k_m",
|
||||
"provider": "local_llama",
|
||||
"model_path": "/path/to/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
|
||||
"profile": "fast",
|
||||
"param_grid": {
|
||||
"temperature": [0.0, 0.1],
|
||||
"max_tokens": [96, 128],
|
||||
"top_p": [0.9, 0.95],
|
||||
"top_k": [20, 40],
|
||||
"repeat_penalty": [1.0, 1.1],
|
||||
"min_p": [0.0, 0.05]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "smollm2-360m-instruct-q4_k_m",
|
||||
"provider": "local_llama",
|
||||
"model_path": "/path/to/SmolLM2-360M-Instruct-Q4_K_M.gguf",
|
||||
"profile": "fast",
|
||||
"param_grid": {
|
||||
"temperature": [0.0, 0.1, 0.2],
|
||||
"max_tokens": [96, 128],
|
||||
"top_p": [0.9, 0.95],
|
||||
"top_k": [20, 40],
|
||||
"repeat_penalty": [1.0, 1.1],
|
||||
"min_p": [0.0, 0.05]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "llama-3.2-1b-instruct-q4_k_m",
|
||||
"provider": "local_llama",
|
||||
"model_path": "/path/to/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
|
||||
"profile": "fast",
|
||||
"param_grid": {
|
||||
"temperature": [0.0, 0.1],
|
||||
"max_tokens": [128, 192],
|
||||
"top_p": [0.9, 0.95],
|
||||
"top_k": [20, 40],
|
||||
"repeat_penalty": [1.0, 1.1],
|
||||
"min_p": [0.0, 0.05]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "llama-3.2-3b-q4_k_m",
|
||||
"provider": "local_llama",
|
||||
"model_path": "/path/to/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
|
||||
"profile": "default",
|
||||
"param_grid": {
|
||||
"temperature": [0.0, 0.1],
|
||||
"max_tokens": [192, 256],
|
||||
"top_p": [0.9, 0.95],
|
||||
"top_k": [20, 40],
|
||||
"repeat_penalty": [1.0, 1.1],
|
||||
"min_p": [0.0, 0.05]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue