Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled
Some checks failed
ci / test-and-build (push) Has been cancelled
This commit is contained in:
parent
98b13d1069
commit
8c1f7c1e13
38 changed files with 5300 additions and 503 deletions
23
Makefile
23
Makefile
|
|
@ -6,7 +6,15 @@ BUILD_DIR := $(CURDIR)/build
|
|||
RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
|
||||
RUN_CONFIG := $(if $(RUN_ARGS),$(abspath $(firstword $(RUN_ARGS))),$(CONFIG))
|
||||
|
||||
.PHONY: run doctor self-check sync test check build package package-deb package-arch release-check install-local install-service install clean-dist clean-build clean
|
||||
.PHONY: run doctor self-check eval-models build-heuristic-dataset sync-default-model check-default-model sync test check build package package-deb package-arch release-check install-local install-service install clean-dist clean-build clean
|
||||
EVAL_DATASET ?= $(CURDIR)/benchmarks/cleanup_dataset.jsonl
|
||||
EVAL_MATRIX ?= $(CURDIR)/benchmarks/model_matrix.small_first.json
|
||||
EVAL_OUTPUT ?= $(CURDIR)/benchmarks/results/latest.json
|
||||
EVAL_HEURISTIC_RAW ?= $(CURDIR)/benchmarks/heuristics_dataset.raw.jsonl
|
||||
EVAL_HEURISTIC_DATASET ?= $(CURDIR)/benchmarks/heuristics_dataset.jsonl
|
||||
EVAL_HEURISTIC_WEIGHT ?= 0.25
|
||||
MODEL_ARTIFACTS ?= $(CURDIR)/benchmarks/model_artifacts.json
|
||||
CONSTANTS_FILE ?= $(CURDIR)/src/constants.py
|
||||
|
||||
ifneq ($(filter run,$(firstword $(MAKECMDGOALS))),)
|
||||
.PHONY: $(RUN_ARGS)
|
||||
|
|
@ -23,6 +31,18 @@ doctor:
|
|||
self-check:
|
||||
uv run aman self-check --config $(CONFIG)
|
||||
|
||||
build-heuristic-dataset:
|
||||
uv run aman build-heuristic-dataset --input $(EVAL_HEURISTIC_RAW) --output $(EVAL_HEURISTIC_DATASET)
|
||||
|
||||
eval-models: build-heuristic-dataset
|
||||
uv run aman eval-models --dataset $(EVAL_DATASET) --matrix $(EVAL_MATRIX) --heuristic-dataset $(EVAL_HEURISTIC_DATASET) --heuristic-weight $(EVAL_HEURISTIC_WEIGHT) --output $(EVAL_OUTPUT)
|
||||
|
||||
sync-default-model:
|
||||
uv run aman sync-default-model --report $(EVAL_OUTPUT) --artifacts $(MODEL_ARTIFACTS) --constants $(CONSTANTS_FILE)
|
||||
|
||||
check-default-model:
|
||||
uv run aman sync-default-model --check --report $(EVAL_OUTPUT) --artifacts $(MODEL_ARTIFACTS) --constants $(CONSTANTS_FILE)
|
||||
|
||||
sync:
|
||||
uv sync
|
||||
|
||||
|
|
@ -45,6 +65,7 @@ package-arch:
|
|||
./scripts/package_arch.sh
|
||||
|
||||
release-check:
|
||||
$(MAKE) check-default-model
|
||||
$(PYTHON) -m py_compile src/*.py tests/*.py
|
||||
$(MAKE) test
|
||||
$(MAKE) build
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue