upgrade fuzzy search library

use process.extract to produce a list of fuzzy matches instead of
process.extractOne
This commit is contained in:
Michael Yang 2023-07-05 11:13:44 -07:00
parent 27ead750c3
commit 6ce9a58c35
4 changed files with 28 additions and 31 deletions

View file

@ -1,9 +1,8 @@
import os
import sys
from os import path
from pathlib import Path
from contextlib import contextmanager
from fuzzywuzzy import process
from thefuzz import process
from llama_cpp import Llama
from ctransformers import AutoModelForCausalLM
@ -39,16 +38,14 @@ def load(model_name, models={}):
for model_type in cls.model_types()
}
while len(runners) > 0:
for match, _ in process.extract(model_path, runners.keys(), limit=len(runners)):
try:
best_match, _ = process.extractOne(model_path, runners.keys())
model = runners.get(best_match, LlamaCppRunner)
runner = model(model_path, best_match)
model = runners.get(match)
runner = model(model_path, match)
models.update({model_name: runner})
return models.get(model_name)
return runner
except Exception:
# try the next runner
runners.pop(best_match)
pass
raise Exception("failed to load model", model_path, model_name)

36
poetry.lock generated
View file

@ -443,23 +443,6 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
tqdm = ["tqdm"]
[[package]]
name = "fuzzywuzzy"
version = "0.18.0"
description = "Fuzzy string matching in python"
optional = false
python-versions = "*"
files = [
{file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"},
{file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"},
]
[package.dependencies]
python-levenshtein = {version = ">=0.12", optional = true, markers = "extra == \"speedup\""}
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "huggingface-hub"
version = "0.15.1"
@ -1043,6 +1026,23 @@ files = [
[package.extras]
tests = ["pytest", "pytest-cov"]
[[package]]
name = "thefuzz"
version = "0.19.0"
description = "Fuzzy string matching in python"
optional = false
python-versions = "*"
files = [
{file = "thefuzz-0.19.0-py2.py3-none-any.whl", hash = "sha256:4fcdde8e40f5ca5e8106bc7665181f9598a9c8b18b0a4d38c41a095ba6788972"},
{file = "thefuzz-0.19.0.tar.gz", hash = "sha256:6f7126db2f2c8a54212b05e3a740e45f4291c497d75d20751728f635bb74aa3d"},
]
[package.dependencies]
python-levenshtein = {version = ">=0.12", optional = true, markers = "extra == \"speedup\""}
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "tqdm"
version = "4.65.0"
@ -1211,4 +1211,4 @@ termcolor = ">=2.2,<3.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "bd4b373e3903bd26b983163f3cc527a6f768f4280201fcbca4d4dc05dea66912"
content-hash = "9e9c14aae817d7863b4facda840e246f98b2d6b2517085b22eb5c5d919ae7784"

View file

@ -18,7 +18,7 @@ validators = "^0.20.0"
yaspin = "^2.3.0"
llama-cpp-python = "^0.1.67"
ctransformers = "^0.2.10"
fuzzywuzzy = {extras = ["speedup"], version = "^0.18.0"}
thefuzz = {version = "^0.19.0", extras = ["speedup"]}
[build-system]
requires = ["poetry-core"]

View file

@ -270,9 +270,6 @@ frozenlist==1.3.3 ; python_version >= "3.8" and python_version < "4.0" \
fsspec==2023.6.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a \
--hash=sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af
fuzzywuzzy[speedup]==0.18.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8 \
--hash=sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993
huggingface-hub==0.15.1 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:05b0fb0abbf1f625dfee864648ac3049fe225ac4371c7bafaca0c2d3a2f83445 \
--hash=sha256:a61b7d1a7769fe10119e730277c72ab99d95c48d86a3d6da3e9f3d0f632a4081
@ -688,6 +685,9 @@ requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" \
termcolor==2.3.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475 \
--hash=sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a
thefuzz[speedup]==0.19.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:4fcdde8e40f5ca5e8106bc7665181f9598a9c8b18b0a4d38c41a095ba6788972 \
--hash=sha256:6f7126db2f2c8a54212b05e3a740e45f4291c497d75d20751728f635bb74aa3d
tqdm==4.65.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5 \
--hash=sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671