examples: fix quantize example (#1387)

@iyubondyrev thank you!
2024-04-28 02:48:47 +02:00 · 2024-04-28 02:48:47 +02:00 · e6bbfb863c
commit e6bbfb863c
parent c58b56123d
1 changed files with 8 additions and 5 deletions
--- a/examples/low_level_api/quantize.py
+++ b/examples/low_level_api/quantize.py
@ -4,14 +4,16 @@ import llama_cpp
 def main(args):
    fname_inp = args.fname_inp.encode("utf-8")
    fname_out = args.fname_out.encode("utf-8")
    if not os.path.exists(fname_inp):
        raise RuntimeError(f"Input file does not exist ({fname_inp})")
    if os.path.exists(fname_out):
        raise RuntimeError(f"Output file already exists ({fname_out})")
-    fname_inp = args.fname_inp.encode("utf-8")
+    ftype = args.type
-    fname_out = args.fname_out.encode("utf-8")
+    args = llama_cpp.llama_model_quantize_default_params()
-    itype = args.itype
+    args.ftype = ftype
-    return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype)
+    return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args)
    if return_code != 0:
        raise RuntimeError("Failed to quantize model")
@ -20,6 +22,7 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("fname_inp", type=str, help="Path to input model")
    parser.add_argument("fname_out", type=str, help="Path to output model")
-    parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)")
+    parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum")
    args = parser.parse_args()
    main(args)