fix: map quantize_type to valid Transformers.js dtype values

--quantize none now emits dtype: "fp32" in the README instead of
dtype: "none", matching Transformers.js documented values (fp32,
fp16, q8, q4).
This commit is contained in:
Shreyas Karnik 2026-03-13 12:57:19 -07:00
parent b05d8863ca
commit df8d625c00
No known key found for this signature in database
GPG Key ID: 996869ACBDC2FF73

View File

@ -321,12 +321,18 @@ def upload_to_hub(
commit_message="Upload ONNX model",
)
# Map quantize_type to Transformers.js dtype values
dtype_map = {"q4": "q4", "q8": "q8", "fp16": "fp16", "none": "fp32"}
tj_dtype = dtype_map.get(quantize_type, "fp32")
format_desc = "FP32 (no quantization)" if quantize_type == "none" else f"{quantize_type.upper()} quantization"
repo_name = output_repo.split("/")[-1]
readme = f"""---
base_model: {base_model}
tags: [onnx, transformers.js, webgpu, query-expansion, qmd]
library_name: transformers.js
---
# {output_repo.split("/")[-1]}
# {repo_name}
ONNX conversion of the QMD Query Expansion model for use with
[Transformers.js](https://huggingface.co/docs/transformers.js) and WebGPU.
@ -336,7 +342,7 @@ ONNX conversion of the QMD Query Expansion model for use with
- **SFT:** {sft_model}
- **GRPO:** {grpo_model}
- **Task:** Query expansion (lex/vec/hyde format)
- **Format:** ONNX with {quantize_type.upper()} quantization
- **Format:** ONNX with {format_desc}
## Usage with Transformers.js
@ -345,7 +351,7 @@ import {{ AutoTokenizer, AutoModelForCausalLM }} from "@huggingface/transformers
const tokenizer = await AutoTokenizer.from_pretrained("{output_repo}");
const model = await AutoModelForCausalLM.from_pretrained("{output_repo}", {{
dtype: "{quantize_type}",
dtype: "{tj_dtype}",
device: "webgpu",
}});
```