fix: private gpt example was broken due to changes in chroma

Signed-off-by: Matt Williams <m@technovangelist.com>
This commit is contained in:
Matt Williams 2023-10-30 10:56:25 -07:00
parent 8d03bd7b54
commit f7856a57eb
4 changed files with 21 additions and 2005 deletions

View file

@ -6,7 +6,7 @@ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')
# Define the Chroma settings # Define the Chroma settings
CHROMA_SETTINGS = Settings( CHROMA_SETTINGS = Settings(
chroma_db_impl='duckdb+parquet', # chroma_db_impl='duckdb+parquet',
persist_directory=PERSIST_DIRECTORY, persist_directory=PERSIST_DIRECTORY,
anonymized_telemetry=False anonymized_telemetry=False
) )

View file

@ -150,7 +150,7 @@ def main():
print("Creating new vectorstore") print("Creating new vectorstore")
texts = process_documents() texts = process_documents()
print(f"Creating embeddings. May take some minutes...") print(f"Creating embeddings. May take some minutes...")
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS) db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
db.persist() db.persist()
db = None db = None

View file

@ -4,6 +4,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from langchain.llms import Ollama from langchain.llms import Ollama
import chromadb
import os import os
import argparse import argparse
import time import time
@ -22,7 +23,10 @@ def main():
# Parse the command line arguments # Parse the command line arguments
args = parse_arguments() args = parse_arguments()
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS) # db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks}) retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
# activate/deactivate the streaming StdOut callback for LLMs # activate/deactivate the streaming StdOut callback for LLMs
callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()] callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]

File diff suppressed because it is too large Load diff