model = AutoAWQForCausalLM.from_pretrained( model_path, low_cpu_mem_usage=True, use_cache=False, ) tokenizer = AutoTokenizer.from_pretrained(model_path)
model.quantize(tokenizer, quant_config=quant_config)
model.save_quantized(quant_path) tokenizer.save_pretrained(quant_path)
print(f'Model is quantized and saved at "{quant_path}"')