pull model from ollama directory

2023-06-29 12:15:13 -04:00 · 2023-06-29 12:15:13 -04:00 · d57903875e
commit d57903875e
parent 8523a54f50
1 changed files with 49 additions and 33 deletions
--- a/ollama/model.py
+++ b/ollama/model.py
@ -5,46 +5,60 @@ from urllib.parse import urlsplit, urlunsplit
 from tqdm import tqdm


-def models(models_home=".", *args, **kwargs):
-    for root, _, files in os.walk(models_home):
+models_endpoint_url = 'https://ollama.ai/api/models'
+
+
+def models(models_home='.', *args, **kwargs):
+    for _, _, files in os.walk(models_home):
        for file in files:
            base, ext = os.path.splitext(file)
-            if ext == ".bin":
-                yield base, os.path.join(root, file)
+            if ext == '.bin':
+                yield base


-def pull(model, models_home=".", *args, **kwargs):
+def pull(model, models_home='.', *args, **kwargs):
    url = model
-    if not (url.startswith("http://") or url.startswith("https://")):
-        url = f"https://{url}"
+    if not validators.url(url) and not url.startswith('huggingface.co'):
+        # this may just be a local model location
+        if model in models(models_home):
+            return model
+        # see if we have this model in our directory
+        response = requests.get(models_endpoint_url)
+        response.raise_for_status()
+        directory = response.json()
+        for model_info in directory:
+            if model_info.get('name') == model:
+                url = f"https://{model_info.get('url')}"
+                break
+        if not validators.url(url):
+            raise Exception(f'Unknown model {model}')
+
+    if not (url.startswith('http://') or url.startswith('https://')):
+        url = f'https://{url}'

    parts = urlsplit(url)
-    path_parts = parts.path.split("/tree/")
+    path_parts = parts.path.split('/tree/')

    if len(path_parts) == 1:
-        url = path_parts[0]
-        branch = "main"
+        location = path_parts[0]
+        branch = 'main'
    else:
-        url, branch = path_parts
+        location, branch = path_parts

-    url = url.strip("/")
+    location = location.strip('/')

    # Reconstruct the URL
-    new_url = urlunsplit(
+    download_url = urlunsplit(
        (
-            "https",
+            'https',
            parts.netloc,
-            f"/api/models/{url}/tree/{branch}",
+            f'/api/models/{location}/tree/{branch}',
            parts.query,
            parts.fragment,
        )
    )

-    if not validators.url(new_url):
-        # this may just be a local model location
-        return model
-
-    response = requests.get(new_url)
+    response = requests.get(download_url)
    response.raise_for_status()  # Raises stored HTTPError, if one occurred

    json_response = response.json()
@ -53,15 +67,17 @@ def pull(model, models_home=".", *args, **kwargs):
    download_url = None
    file_size = 0
    for file_info in json_response:
-        if file_info.get("type") == "file" and file_info.get("path").endswith(".bin"):
-            f_path = file_info.get("path")
-            download_url = f"https://huggingface.co/{url}/resolve/{branch}/{f_path}"
-            file_size = file_info.get("size")
+        if file_info.get('type') == 'file' and file_info.get('path').endswith('.bin'):
+            f_path = file_info.get('path')
+            download_url = (
+                f'https://huggingface.co/{location}/resolve/{branch}/{f_path}'
+            )
+            file_size = file_info.get('size')

    if download_url is None:
-        raise Exception("No model found")
+        raise Exception('No model found')

-    local_filename = os.path.join(models_home, os.path.basename(url)) + ".bin"
+    local_filename = os.path.join(models_home, os.path.basename(url)) + '.bin'

    # Check if file already exists
    first_byte = 0
@ -72,27 +88,27 @@ def pull(model, models_home=".", *args, **kwargs):
    if first_byte >= file_size:
        return local_filename

-    print(f"Pulling {parts.netloc}/{model}...")
+    print(f'Pulling {model}...')

    # If file size is non-zero, resume download
    if first_byte != 0:
-        header = {"Range": f"bytes={first_byte}-"}
+        header = {'Range': f'bytes={first_byte}-'}
    else:
        header = {}

    response = requests.get(download_url, headers=header, stream=True)
    response.raise_for_status()  # Raises stored HTTPError, if one occurred

-    total_size = int(response.headers.get("content-length", 0))
+    total_size = int(response.headers.get('content-length', 0))

-    with open(local_filename, "ab" if first_byte else "wb") as file, tqdm(
+    with open(local_filename, 'ab' if first_byte else 'wb') as file, tqdm(
        total=total_size,
-        unit="iB",
+        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
        initial=first_byte,
-        ascii=" ==",
-        bar_format="Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}",
+        ascii=' ==',
+        bar_format='Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}',
    ) as bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)