import curses import feedparser import requests import unicodedata import json from newspaper import Article from bs4 import BeautifulSoup from nltk.tokenize import sent_tokenize, word_tokenize import numpy as np from sklearn.neighbors import NearestNeighbors from mattsollamatools import chunker # Create a dictionary to store topics and their URLs topic_urls = { "Mac": "https://9to5mac.com/guides/mac/feed", "News": "http://www.npr.org/rss/rss.php?id=1001", "Nvidia": "https://nvidianews.nvidia.com/releases.xml", "Raspberry Pi": "https://www.raspberrypi.com/news/feed/", "Music": "https://www.billboard.com/c/music/music-news/feed/" } # Use curses to create a menu of topics def menu(stdscr): chosen_topic = get_url_for_topic(stdscr) url = topic_urls[chosen_topic] if chosen_topic in topic_urls else "Topic not found" stdscr.addstr(len(topic_urls) + 3, 0, f"Selected URL for {chosen_topic}: {url}") stdscr.refresh() return chosen_topic # You have chosen a topic. Now return the url for that topic def get_url_for_topic(stdscr): curses.curs_set(0) # Hide the cursor stdscr.clear() stdscr.addstr(0, 0, "Choose a topic using the arrow keys (Press Enter to select):") # Create a list of topics topics = list(topic_urls.keys()) current_topic = 0 while True: for i, topic in enumerate(topics): if i == current_topic: stdscr.addstr(i + 2, 2, f"> {topic}") else: stdscr.addstr(i + 2, 2, f" {topic}") stdscr.refresh() key = stdscr.getch() if key == curses.KEY_DOWN and current_topic < len(topics) - 1: current_topic += 1 elif key == curses.KEY_UP and current_topic > 0: current_topic -= 1 elif key == 10: # Enter key return topic_urls[topics[current_topic]] # Get the last N URLs from an RSS feed def getUrls(feed_url, n=20): feed = feedparser.parse(feed_url) entries = feed.entries[-n:] urls = [entry.link for entry in entries] return urls # Often there are a bunch of ads and menus on pages for a news article. This uses newspaper3k to get just the text of just the article. def getArticleText(url): article = Article(url) article.download() article.parse() return article.text def get_summary(text): systemPrompt = "Write a concise summary of the text, return your responses with 5 lines that cover the key points of the text given." prompt = text url = "http://localhost:11434/api/generate" payload = { "model": "mistral-openorca", "prompt": prompt, "system": systemPrompt, "stream": False } payload_json = json.dumps(payload) headers = {"Content-Type": "application/json"} response = requests.post(url, data=payload_json, headers=headers) return json.loads(response.text)["response"] # Perform K-nearest neighbors (KNN) search def knn_search(question_embedding, embeddings, k=5): X = np.array([item['embedding'] for article in embeddings for item in article['embeddings']]) source_texts = [item['source'] for article in embeddings for item in article['embeddings']] # Fit a KNN model on the embeddings knn = NearestNeighbors(n_neighbors=k, metric='cosine') knn.fit(X) # Find the indices and distances of the k-nearest neighbors distances, indices = knn.kneighbors(question_embedding, n_neighbors=k) # Get the indices and source texts of the best matches best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)] return best_matches