parent
c58b56123d
commit
e6bbfb863c
1 changed files with 8 additions and 5 deletions
|
@ -4,14 +4,16 @@ import llama_cpp
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
fname_inp = args.fname_inp.encode("utf-8")
|
||||||
|
fname_out = args.fname_out.encode("utf-8")
|
||||||
if not os.path.exists(fname_inp):
|
if not os.path.exists(fname_inp):
|
||||||
raise RuntimeError(f"Input file does not exist ({fname_inp})")
|
raise RuntimeError(f"Input file does not exist ({fname_inp})")
|
||||||
if os.path.exists(fname_out):
|
if os.path.exists(fname_out):
|
||||||
raise RuntimeError(f"Output file already exists ({fname_out})")
|
raise RuntimeError(f"Output file already exists ({fname_out})")
|
||||||
fname_inp = args.fname_inp.encode("utf-8")
|
ftype = args.type
|
||||||
fname_out = args.fname_out.encode("utf-8")
|
args = llama_cpp.llama_model_quantize_default_params()
|
||||||
itype = args.itype
|
args.ftype = ftype
|
||||||
return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype)
|
return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args)
|
||||||
if return_code != 0:
|
if return_code != 0:
|
||||||
raise RuntimeError("Failed to quantize model")
|
raise RuntimeError("Failed to quantize model")
|
||||||
|
|
||||||
|
@ -20,6 +22,7 @@ if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("fname_inp", type=str, help="Path to input model")
|
parser.add_argument("fname_inp", type=str, help="Path to input model")
|
||||||
parser.add_argument("fname_out", type=str, help="Path to output model")
|
parser.add_argument("fname_out", type=str, help="Path to output model")
|
||||||
parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)")
|
parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue