In [None]:
# Download and run the Ollama Linux install script
!curl https://ollama.ai/install.sh | sh
!command -v systemctl >/dev/null && sudo systemctl stop ollama

In [None]:
!pip install aiohttp pyngrok

import os
import asyncio
from aiohttp import ClientSession

# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred
# over the built-in library. This is particularly important for 
# Google Colab which installs older drivers
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

async def run(cmd):
 '''
 run is a helper function to run subcommands asynchronously.
 '''
 print('>>> starting', *cmd)
 p = await asyncio.subprocess.create_subprocess_exec(
 *cmd,
 stdout=asyncio.subprocess.PIPE,
 stderr=asyncio.subprocess.PIPE,
 )

 async def pipe(lines):
 async for line in lines:
 print(line.strip().decode('utf-8'))

 await asyncio.gather(
 pipe(p.stdout),
 pipe(p.stderr),
 )


await asyncio.gather(
 run(['ollama', 'serve']),
 run(['ngrok', 'http', '--log', 'stderr', '11434']),
)

The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.

```
t=2023-11-12T22:55:56+0000 lvl=info msg="started tunnel" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io
```

The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.

```bash
export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io
ollama list
ollama run mistral
```