ollama/examples/python-rag-newssummary/summ.py

import curses
import json
from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
import requests
from sentence_transformers import SentenceTransformer
from mattsollamatools import chunker

if __name__ == "__main__":
    chosen_topic = curses.wrapper(menu)
    print("Here is your news summary:\n")
    urls = getUrls(chosen_topic, n=5)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    allEmbeddings = []

    for url in urls:
      article={}
      article['embeddings'] = []
      article['url'] = url
      text = getArticleText(url)
      summary = get_summary(text)
      chunks = chunker(text)  # Use the chunk_text function from web_utils
      embeddings = model.encode(chunks)
      for (chunk, embedding) in zip(chunks, embeddings):
        item = {}
        item['source'] = chunk
        item['embedding'] = embedding.tolist()  # Convert NumPy array to list
        item['sourcelength'] = len(chunk)
        article['embeddings'].append(item)
    
      allEmbeddings.append(article)

      print(f"{summary}\n")

    
    while True:
      context = []
      # Input a question from the user
      question = input("Enter your question about the news, or type quit: ")

      if question.lower() == 'quit':
        break

      # Embed the user's question
      question_embedding = model.encode([question])

      # Perform KNN search to find the best matches (indices and source text)
      best_matches = knn_search(question_embedding, allEmbeddings, k=10)


      sourcetext=""
      for i, (index, source_text) in enumerate(best_matches, start=1):
          sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"

      systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"

      url = "http://localhost:11434/api/generate"

      payload = {
      "model": "mistral-openorca",
      "prompt": question, 
      "system": systemPrompt,
      "stream": False, 
      "context": context
      }

      # Convert the payload to a JSON string
      payload_json = json.dumps(payload)

      # Set the headers to specify JSON content
      headers = {
          "Content-Type": "application/json"
      }

      # Send the POST request
      response = requests.post(url, data=payload_json, headers=headers)

      # Check the response
      if response.status_code == 200:
          output = json.loads(response.text)
          context = output['context']
          print(output['response']+ "\n")
          

      else:
          print(f"Request failed with status code {response.status_code}")
added python rag news summary Signed-off-by: Matt Williams <m@technovangelist.com> 2023-10-17 23:41:28 +00:00			`import curses`
			`import json`
			`from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search`
			`import requests`
			`from sentence_transformers import SentenceTransformer`
			`from mattsollamatools import chunker`

			`if __name__ == "__main__":`
			`chosen_topic = curses.wrapper(menu)`
			`print("Here is your news summary:\n")`
			`urls = getUrls(chosen_topic, n=5)`
			`model = SentenceTransformer('all-MiniLM-L6-v2')`
			`allEmbeddings = []`

			`for url in urls:`
			`article={}`
			`article['embeddings'] = []`
			`article['url'] = url`
			`text = getArticleText(url)`
			`summary = get_summary(text)`
			`chunks = chunker(text) # Use the chunk_text function from web_utils`
			`embeddings = model.encode(chunks)`
			`for (chunk, embedding) in zip(chunks, embeddings):`
			`item = {}`
			`item['source'] = chunk`
			`item['embedding'] = embedding.tolist() # Convert NumPy array to list`
			`item['sourcelength'] = len(chunk)`
			`article['embeddings'].append(item)`

			`allEmbeddings.append(article)`

			`print(f"{summary}\n")`


			`while True:`
			`context = []`
			`# Input a question from the user`
			`question = input("Enter your question about the news, or type quit: ")`

			`if question.lower() == 'quit':`
			`break`

			`# Embed the user's question`
			`question_embedding = model.encode([question])`

			`# Perform KNN search to find the best matches (indices and source text)`
			`best_matches = knn_search(question_embedding, allEmbeddings, k=10)`


			`sourcetext=""`
			`for i, (index, source_text) in enumerate(best_matches, start=1):`
			`sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"`

			`systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"`

			`url = "http://localhost:11434/api/generate"`

			`payload = {`
			`"model": "mistral-openorca",`
			`"prompt": question,`
			`"system": systemPrompt,`
			`"stream": False,`
			`"context": context`
			`}`

			`# Convert the payload to a JSON string`
			`payload_json = json.dumps(payload)`

			`# Set the headers to specify JSON content`
			`headers = {`
			`"Content-Type": "application/json"`
			`}`

			`# Send the POST request`
			`response = requests.post(url, data=payload_json, headers=headers)`

			`# Check the response`
			`if response.status_code == 200:`
			`output = json.loads(response.text)`
			`context = output['context']`
			`print(output['response']+ "\n")`


			`else:`
			`print(f"Request failed with status code {response.status_code}")`