- bench_embedding.sh / bench_ollama.sh for Ollama & embedding API tests - hf_embedding_bench.py for HF model performance - models.txt / models-emb.txt for test configs - docs in bench_embedding.md / bench_ollama.md
204 lines
7.3 KiB
Bash
204 lines
7.3 KiB
Bash
#!/usr/bin/env bash
|
||
# docs/bench_embedding.sh
|
||
# Benchmark /v1/embeddings: 维度/耗时/tokens/s/样本吞吐等(支持维度过滤)
|
||
set -euo pipefail
|
||
|
||
# ===== Default Config (env overrides) =====
|
||
ENDPOINT="${ENDPOINT:-http://127.0.0.1:9000}"
|
||
PATH_EMB="/v1/embeddings"
|
||
TIMEOUT="${TIMEOUT:-120}"
|
||
N="${N:-20}" # hey 请求总数
|
||
C="${C:-2}" # hey 并发数
|
||
BATCH="${BATCH:-4}" # 每次请求的 input 条数
|
||
CSV_OUTPUT="${CSV_OUTPUT:-}" # 非空时输出 CSV:embedding_bench.csv
|
||
REQUIRE_DIM="" # 通过 --require-dim 指定,例如 1024
|
||
|
||
# 默认模型列表(可用 --input_config 覆盖)
|
||
MODELS=(
|
||
"bge-m3:latest"
|
||
"qwen2.5:7b"
|
||
"llama2:7b"
|
||
)
|
||
|
||
# 默认文本样本(可用 --input_texts 覆盖;每行一个样本)
|
||
DEFAULT_TEXTS=(
|
||
"用一句话介绍你自己。"
|
||
"What is vector similarity search?"
|
||
"给我三条提升 Postgres 查询性能的建议。"
|
||
"Explain the difference between CPU and GPU."
|
||
"如何在 macOS 上安装 hey 并做并发压测?"
|
||
"Summarize the benefits of using embeddings for RAG."
|
||
)
|
||
|
||
# ===== Parse args =====
|
||
INPUT_FILE=""
|
||
while [[ $# -gt 0 ]]; do
|
||
case "$1" in
|
||
--input_config)
|
||
[[ -f "${2:-}" ]] || { echo "❌ --input_config file not found: ${2:-<missing>}"; exit 1; }
|
||
mapfile -t MODELS < <(sed -e 's/#.*$//' -e '/^[[:space:]]*$/d' "$2")
|
||
shift 2
|
||
;;
|
||
--input_texts)
|
||
[[ -f "${2:-}" ]] || { echo "❌ --input_texts file not found: ${2:-<missing>}"; exit 1; }
|
||
INPUT_FILE="$2"
|
||
shift 2
|
||
;;
|
||
--model)
|
||
MODELS=("${2:-bge-m3:latest}")
|
||
shift 2
|
||
;;
|
||
--require-dim)
|
||
REQUIRE_DIM="${2:-}"
|
||
[[ -n "$REQUIRE_DIM" ]] || { echo "❌ --require-dim needs a number (e.g., 1024)"; exit 1; }
|
||
shift 2
|
||
;;
|
||
*)
|
||
echo "⚠️ Unknown arg: $1 (ignored)"; shift 1;;
|
||
esac
|
||
done
|
||
|
||
# ===== Deps =====
|
||
need(){ command -v "$1" >/dev/null 2>&1 || { echo "❌ Need $1: brew install $1"; exit 1; }; }
|
||
need curl; need jq; need awk; need sed; need hey
|
||
|
||
# ===== Build inputs array (JSON) =====
|
||
# 取前 BATCH 条文本,构造 JSON 数组
|
||
build_inputs_json() {
|
||
local count="$1"
|
||
local arr="[]"
|
||
if [[ -n "$INPUT_FILE" ]]; then
|
||
arr=$(head -n "$count" "$INPUT_FILE" | sed '/^[[:space:]]*$/d' | jq -R -s 'split("\n")|map(select(length>0))')
|
||
else
|
||
local tmp=""
|
||
for s in "${DEFAULT_TEXTS[@]}"; do tmp+="${s}\n"; done
|
||
arr=$(printf "%b" "$tmp" | head -n "$count" | sed '/^[[:space:]]*$/d' | jq -R -s 'split("\n")|map(select(length>0))')
|
||
fi
|
||
printf '%s' "$arr"
|
||
}
|
||
|
||
# 构造请求负载
|
||
json_payload() {
|
||
local model="$1" inputs_json="$2"
|
||
cat <<EOF
|
||
{
|
||
"model": "${model}",
|
||
"input": ${inputs_json}
|
||
}
|
||
EOF
|
||
}
|
||
|
||
# 单次请求:解析 HTTP/TTFT/Total/维度/样本数/usage.prompt_tokens
|
||
measure_once() {
|
||
local model="$1" inputs_json="$2"
|
||
local delim="__CURL_TIMING__"
|
||
local resp timing http_code ttft total body dim samples ptok
|
||
resp=$(curl -sS -m "${TIMEOUT}" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$(json_payload "$model" "$inputs_json")" \
|
||
-w "\n${delim} %{http_code} %{time_starttransfer} %{time_total}" \
|
||
"${ENDPOINT}${PATH_EMB}" || true)
|
||
|
||
timing=$(printf '%s\n' "$resp" | sed -n '$p')
|
||
read -r _ http_code ttft total <<<"$timing"
|
||
body=$(printf '%s\n' "$resp" | sed '$d')
|
||
|
||
dim=$(printf '%s' "$body" | jq -r '.data[0].embedding | length // 0' 2>/dev/null || echo 0)
|
||
samples=$(printf '%s' "$body" | jq -r '.data | length // 0' 2>/dev/null || echo 0)
|
||
ptok=$(printf '%s' "$body" | jq -r '.usage.prompt_tokens // 0' 2>/dev/null || echo 0)
|
||
|
||
echo "${http_code:-0} ${ttft:-0} ${total:-0} ${dim:-0} ${samples:-0} ${ptok:-0}"
|
||
}
|
||
|
||
# hey 压测:QPS/Avg/P90/P95
|
||
run_hey() {
|
||
local model="$1" inputs_json="$2"
|
||
local tmp payload
|
||
tmp="$(mktemp)"
|
||
payload=$(json_payload "$model" "$inputs_json")
|
||
hey -n "${N}" -c "${C}" -m POST \
|
||
-H "Content-Type: application/json" \
|
||
-D <(printf '%s' "$payload") \
|
||
"${ENDPOINT}${PATH_EMB}" > "$tmp" 2>/dev/null || true
|
||
|
||
local reqs avg p90 p95
|
||
reqs=$(grep -E 'Requests/sec' "$tmp" | awk '{print $2}')
|
||
avg=$(grep -E '^ Average:' "$tmp" | awk '{print $2}')
|
||
p90=$(grep -E ' 90% in ' "$tmp" | awk '{print $3}')
|
||
# 某些 hey 版本 " 95% in <val>" 第 3 列,也有第 2 列的实现,兼容处理
|
||
p95=$(grep -E ' 95% in ' "$tmp" | awk '{print $3}')
|
||
[[ -z "$p95" ]] && p95=$(grep -E ' 95% in ' "$tmp" | awk '{print $2}')
|
||
rm -f "$tmp"
|
||
echo "${reqs:-NA} ${avg:-NA} ${p90:-NA} ${p95:-NA}"
|
||
}
|
||
|
||
line(){ printf '%s\n' "-----------------------------------------------------------------------------------------------------"; }
|
||
|
||
print_header(){
|
||
echo "🔧 Endpoint: ${ENDPOINT}${PATH_EMB}"
|
||
echo "🧪 每请求样本(条):BATCH=${BATCH} 压测:N=${N}, C=${C} timeout=${TIMEOUT}s"
|
||
[[ -n "$REQUIRE_DIM" ]] && echo "📏 维度过滤:仅统计 dim == ${REQUIRE_DIM} 的结果"
|
||
line
|
||
printf "%-22s | %-5s | %-8s | %-8s | %-6s | %-7s | %-8s | %-10s | %-10s | %-8s | %-8s\n" \
|
||
"Model" "HTTP" "TTFT" "Total" "Dim" "Samples" "InTok" "Tok/s" "Samples/s" "P90(s)" "P95(s)"
|
||
line
|
||
}
|
||
|
||
maybe_csv_header(){
|
||
[[ -z "$CSV_OUTPUT" ]] && return 0
|
||
echo "model,http,ttft,total,dim,samples,ptokens,tokens_per_s,samples_per_s,p90,p95" > embedding_bench.csv
|
||
}
|
||
|
||
# ===== Run =====
|
||
print_header
|
||
maybe_csv_header
|
||
|
||
INPUTS_JSON="$(build_inputs_json "$BATCH")"
|
||
|
||
for m in "${MODELS[@]}"; do
|
||
# 预热一次
|
||
curl -sS -m "${TIMEOUT}" -o /dev/null -H "Content-Type: application/json" \
|
||
-d "$(json_payload "$m" "$INPUTS_JSON")" "${ENDPOINT}${PATH_EMB}" >/dev/null 2>&1 || true
|
||
|
||
# 单次测量
|
||
read -r http_code ttft total dim samples ptok <<<"$(measure_once "$m" "$INPUTS_JSON")"
|
||
|
||
if [[ "$http_code" != "200" ]]; then
|
||
printf "%-22s | %-5s | %-8s | %-8s | %-6s | %-7s | %-8s | %-10s | %-10s | %-8s | %-8s\n" \
|
||
"$m" "$http_code" "FAIL" "FAIL" "-" "-" "-" "-" "-" "-"
|
||
[[ -n "$CSV_OUTPUT" ]] && echo "$m,$http_code,FAIL,FAIL,-,-,-,-,-,-,-" >> embedding_bench.csv
|
||
continue
|
||
fi
|
||
|
||
# 维度过滤:只统计指定维度(如 1024)
|
||
if [[ -n "$REQUIRE_DIM" && "$dim" != "$REQUIRE_DIM" ]]; then
|
||
printf "%-22s | %-5s | %-8s | %-8s | %-6s | %-7s | %-8s | %-10s | %-10s | %-8s | %-8s\n" \
|
||
"$m" "$http_code" "SKIP" "SKIP" "$dim" "$samples" "$ptok" "-" "-" "-" "-"
|
||
[[ -n "$CSV_OUTPUT" ]] && echo "$m,$http_code,SKIP,SKIP,$dim,$samples,$ptok,-,-,-,-" >> embedding_bench.csv
|
||
continue
|
||
fi
|
||
|
||
# 吞吐估算
|
||
toks_per_s="NA"; samples_per_s="NA"
|
||
if awk "BEGIN{exit !($total>0)}"; then
|
||
if awk "BEGIN{exit !($ptok>=0)}"; then
|
||
toks_per_s=$(awk "BEGIN {printf \"%.2f\", ${ptok}/${total}}")
|
||
fi
|
||
if awk "BEGIN{exit !($samples>0)}"; then
|
||
samples_per_s=$(awk "BEGIN {printf \"%.2f\", ${samples}/${total}}")
|
||
fi
|
||
fi
|
||
|
||
# 并发压测(补充 P90/P95)
|
||
read -r reqs avg p90 p95 <<<"$(run_hey "$m" "$INPUTS_JSON")"
|
||
|
||
printf "%-22s | %-5s | %-8.3f | %-8.3f | %-6s | %-7s | %-8s | %-10s | %-10s | %-8s | %-8s\n" \
|
||
"$m" "$http_code" "$ttft" "$total" "$dim" "$samples" "$ptok" "$toks_per_s" "$samples_per_s" "$p90" "$p95"
|
||
|
||
[[ -n "$CSV_OUTPUT" ]] && echo "$m,$http_code,$ttft,$total,$dim,$samples,$ptok,$toks_per_s,$samples_per_s,$p90,$p95" >> embedding_bench.csv
|
||
done
|
||
|
||
line
|
||
echo "说明:Dim = 向量维度;Samples = 每请求输入条数;InTok = usage.prompt_tokens;Tok/s = InTok/Total;Samples/s = Samples/Total"
|
||
[[ -n "$CSV_OUTPUT" ]] && echo "• 已输出 CSV:embedding_bench.csv"
|