pull model from ollama directory

This commit is contained in:
Bruce MacDonald 2023-06-29 12:15:13 -04:00
parent 8523a54f50
commit d57903875e

View file

@ -5,46 +5,60 @@ from urllib.parse import urlsplit, urlunsplit
from tqdm import tqdm from tqdm import tqdm
def models(models_home=".", *args, **kwargs): models_endpoint_url = 'https://ollama.ai/api/models'
for root, _, files in os.walk(models_home):
def models(models_home='.', *args, **kwargs):
for _, _, files in os.walk(models_home):
for file in files: for file in files:
base, ext = os.path.splitext(file) base, ext = os.path.splitext(file)
if ext == ".bin": if ext == '.bin':
yield base, os.path.join(root, file) yield base
def pull(model, models_home=".", *args, **kwargs): def pull(model, models_home='.', *args, **kwargs):
url = model url = model
if not (url.startswith("http://") or url.startswith("https://")): if not validators.url(url) and not url.startswith('huggingface.co'):
url = f"https://{url}" # this may just be a local model location
if model in models(models_home):
return model
# see if we have this model in our directory
response = requests.get(models_endpoint_url)
response.raise_for_status()
directory = response.json()
for model_info in directory:
if model_info.get('name') == model:
url = f"https://{model_info.get('url')}"
break
if not validators.url(url):
raise Exception(f'Unknown model {model}')
if not (url.startswith('http://') or url.startswith('https://')):
url = f'https://{url}'
parts = urlsplit(url) parts = urlsplit(url)
path_parts = parts.path.split("/tree/") path_parts = parts.path.split('/tree/')
if len(path_parts) == 1: if len(path_parts) == 1:
url = path_parts[0] location = path_parts[0]
branch = "main" branch = 'main'
else: else:
url, branch = path_parts location, branch = path_parts
url = url.strip("/") location = location.strip('/')
# Reconstruct the URL # Reconstruct the URL
new_url = urlunsplit( download_url = urlunsplit(
( (
"https", 'https',
parts.netloc, parts.netloc,
f"/api/models/{url}/tree/{branch}", f'/api/models/{location}/tree/{branch}',
parts.query, parts.query,
parts.fragment, parts.fragment,
) )
) )
if not validators.url(new_url): response = requests.get(download_url)
# this may just be a local model location
return model
response = requests.get(new_url)
response.raise_for_status() # Raises stored HTTPError, if one occurred response.raise_for_status() # Raises stored HTTPError, if one occurred
json_response = response.json() json_response = response.json()
@ -53,15 +67,17 @@ def pull(model, models_home=".", *args, **kwargs):
download_url = None download_url = None
file_size = 0 file_size = 0
for file_info in json_response: for file_info in json_response:
if file_info.get("type") == "file" and file_info.get("path").endswith(".bin"): if file_info.get('type') == 'file' and file_info.get('path').endswith('.bin'):
f_path = file_info.get("path") f_path = file_info.get('path')
download_url = f"https://huggingface.co/{url}/resolve/{branch}/{f_path}" download_url = (
file_size = file_info.get("size") f'https://huggingface.co/{location}/resolve/{branch}/{f_path}'
)
file_size = file_info.get('size')
if download_url is None: if download_url is None:
raise Exception("No model found") raise Exception('No model found')
local_filename = os.path.join(models_home, os.path.basename(url)) + ".bin" local_filename = os.path.join(models_home, os.path.basename(url)) + '.bin'
# Check if file already exists # Check if file already exists
first_byte = 0 first_byte = 0
@ -72,27 +88,27 @@ def pull(model, models_home=".", *args, **kwargs):
if first_byte >= file_size: if first_byte >= file_size:
return local_filename return local_filename
print(f"Pulling {parts.netloc}/{model}...") print(f'Pulling {model}...')
# If file size is non-zero, resume download # If file size is non-zero, resume download
if first_byte != 0: if first_byte != 0:
header = {"Range": f"bytes={first_byte}-"} header = {'Range': f'bytes={first_byte}-'}
else: else:
header = {} header = {}
response = requests.get(download_url, headers=header, stream=True) response = requests.get(download_url, headers=header, stream=True)
response.raise_for_status() # Raises stored HTTPError, if one occurred response.raise_for_status() # Raises stored HTTPError, if one occurred
total_size = int(response.headers.get("content-length", 0)) total_size = int(response.headers.get('content-length', 0))
with open(local_filename, "ab" if first_byte else "wb") as file, tqdm( with open(local_filename, 'ab' if first_byte else 'wb') as file, tqdm(
total=total_size, total=total_size,
unit="iB", unit='iB',
unit_scale=True, unit_scale=True,
unit_divisor=1024, unit_divisor=1024,
initial=first_byte, initial=first_byte,
ascii=" ==", ascii=' ==',
bar_format="Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}", bar_format='Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}',
) as bar: ) as bar:
for data in response.iter_content(chunk_size=1024): for data in response.iter_content(chunk_size=1024):
size = file.write(data) size = file.write(data)