Spaces:
Running
Running
Taras Yaroshko
commited on
Commit
•
c46c365
1
Parent(s):
2c0b06c
Remove comments
Browse files- app/main.py +0 -55
- app/utils/llm_utils.py +0 -249
- app/utils/prompt_suggestions.py +0 -306
app/main.py
CHANGED
@@ -1,58 +1,3 @@
|
|
1 |
-
# from fastapi import FastAPI, HTTPException
|
2 |
-
# from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
# from milvus import default_server
|
4 |
-
# from pymilvus import Collection, connections, utility
|
5 |
-
|
6 |
-
# from app.models.prompts import PromptRequest, PromptResponse, URLModel
|
7 |
-
# from app.utils.llama_index_utils import (
|
8 |
-
# answer_prompt_with_template,
|
9 |
-
# # connect_to_milvus,
|
10 |
-
# init_vector_store_index,
|
11 |
-
# load_documents,
|
12 |
-
# )
|
13 |
-
# from app.utils.prompt_suggestions import generate_suggestions
|
14 |
-
|
15 |
-
# origins = [
|
16 |
-
# "http://localhost",
|
17 |
-
# "http://localhost:3000",
|
18 |
-
# "http://localhost:5500",
|
19 |
-
# "http://chatwidget.ethoradev.com",
|
20 |
-
# "https://chatwidget.ethoradev.com/",
|
21 |
-
# ]
|
22 |
-
|
23 |
-
# app = FastAPI()
|
24 |
-
|
25 |
-
# index = init_vector_store_index()
|
26 |
-
|
27 |
-
# app.add_middleware(
|
28 |
-
# CORSMiddleware,
|
29 |
-
# allow_origins=["*"],
|
30 |
-
# allow_credentials=True,
|
31 |
-
# allow_methods=["*"],
|
32 |
-
# allow_headers=["*"],
|
33 |
-
# )
|
34 |
-
|
35 |
-
|
36 |
-
# @app.post("/answer")
|
37 |
-
# async def answer_prompt(request: PromptRequest):
|
38 |
-
# try:
|
39 |
-
# response = answer_prompt_with_template(request.prompt, index, request.url)
|
40 |
-
# # answer = response.response
|
41 |
-
# return {"answer": response}
|
42 |
-
# except Exception as e:
|
43 |
-
# raise HTTPException(status_code=500, detail=str(e))
|
44 |
-
|
45 |
-
|
46 |
-
# @app.post("/generate_suggestions")
|
47 |
-
# async def get_suggestions(url: URLModel):
|
48 |
-
# try:
|
49 |
-
# suggestions = generate_suggestions(url.url, url.history)
|
50 |
-
# # suggestions = response.suggestions
|
51 |
-
# return {"suggestions": suggestions}
|
52 |
-
# except Exception as e:
|
53 |
-
# raise HTTPException(status_code=500, detail=str(e))
|
54 |
-
|
55 |
-
|
56 |
from fastapi import FastAPI, HTTPException
|
57 |
from fastapi.middleware.cors import CORSMiddleware
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
|
app/utils/llm_utils.py
CHANGED
@@ -1,252 +1,3 @@
|
|
1 |
-
# import logging
|
2 |
-
# import os
|
3 |
-
# import random
|
4 |
-
|
5 |
-
# import pandas as pd
|
6 |
-
# from dotenv import load_dotenv
|
7 |
-
# from llama_index.core import (
|
8 |
-
# PromptTemplate,
|
9 |
-
# ServiceContext,
|
10 |
-
# SimpleDirectoryReader,
|
11 |
-
# StorageContext,
|
12 |
-
# VectorStoreIndex,
|
13 |
-
# load_index_from_storage,
|
14 |
-
# )
|
15 |
-
# from llama_index.core.callbacks import CallbackManager
|
16 |
-
# from llama_index.core.node_parser import SentenceSplitter
|
17 |
-
# from llama_index.core.schema import TextNode
|
18 |
-
# from llama_index.llms.openai import OpenAI
|
19 |
-
# from llama_index.vector_stores.milvus import MilvusVectorStore
|
20 |
-
# from milvus import default_server
|
21 |
-
# from pymilvus import Collection, connections, utility
|
22 |
-
# import sys
|
23 |
-
# from llama_index.core.vector_stores.types import MetadataFilters, ExactMatchFilter
|
24 |
-
|
25 |
-
# load_dotenv()
|
26 |
-
# api_key = os.getenv("OPENAI_API_KEY")
|
27 |
-
|
28 |
-
# # CSV_PATH = "/code/app/data/steno_summit.csv"
|
29 |
-
# CSV_PATH = os.getenv("CSV_PATH")
|
30 |
-
# TRANSCRIPTS_DIR = os.getenv("TRANSCRIPTS_DIR")
|
31 |
-
# PERSIST_DIR = os.getenv("PERSIST_DIR")
|
32 |
-
|
33 |
-
# logging.basicConfig(level=logging.INFO)
|
34 |
-
# logger = logging.getLogger(__name__)
|
35 |
-
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
36 |
-
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
37 |
-
|
38 |
-
|
39 |
-
# def load_links_table(csv_path: str = CSV_PATH):
|
40 |
-
# """Load the links table from the given CSV file.
|
41 |
-
|
42 |
-
# Args:
|
43 |
-
# csv_path (str, optional): The path to the CSV file. Defaults to "./app/data/drtalks.csv".
|
44 |
-
|
45 |
-
# Returns:
|
46 |
-
# pd.DataFrame: The loaded links table.
|
47 |
-
# """
|
48 |
-
# print("Loading links table...")
|
49 |
-
# links_table = pd.read_csv(csv_path)
|
50 |
-
# print(f"Successfully loaded {len(links_table)} links.\n")
|
51 |
-
# return links_table
|
52 |
-
|
53 |
-
|
54 |
-
# def connect_to_milvus(host: str = "127.0.0.1", port: int = default_server.listen_port):
|
55 |
-
# """Connect to the Milvus server using the given host and port.
|
56 |
-
|
57 |
-
# Args:
|
58 |
-
# host (str, optional): The name of the host. Defaults to "127.0.0.1".
|
59 |
-
# port (int, optional): The name of the port. Defaults to default_server.listen_port.
|
60 |
-
|
61 |
-
# Returns:
|
62 |
-
# bool: If the connection was successful.
|
63 |
-
# """
|
64 |
-
# print("\nConnecting to Milvus server...")
|
65 |
-
# try:
|
66 |
-
# connections.connect(host=host, port=port)
|
67 |
-
# except Exception as e:
|
68 |
-
# logger.error(f"Failed to connect to Milvus server: {e}\n")
|
69 |
-
# print(f"Failed to connect to Milvus server: {e}\n")
|
70 |
-
# logger.info("Starting default Milvus server...")
|
71 |
-
# print("Starting default Milvus server...")
|
72 |
-
# default_server.start()
|
73 |
-
# connections.connect(host=host, port=default_server.listen_port)
|
74 |
-
# return False
|
75 |
-
# if utility.get_server_type() == "milvus":
|
76 |
-
# logger.info("Succesfully connected to Milvus server\n")
|
77 |
-
# print("Succesfully connected to Milvus server\n")
|
78 |
-
# return True
|
79 |
-
# logger.error("Failed to connect to Milvus server\n")
|
80 |
-
# print("Failed to connect to Milvus server\n")
|
81 |
-
# return False
|
82 |
-
|
83 |
-
|
84 |
-
# def load_documents(transcripts_dir: str = TRANSCRIPTS_DIR, links_table: str = CSV_PATH):
|
85 |
-
# """Load the documents from the given directory.
|
86 |
-
|
87 |
-
# Args:
|
88 |
-
# transcripts_dir (str, optional): Directory of the transcripts. Defaults to "./outputs".
|
89 |
-
|
90 |
-
# Returns:
|
91 |
-
# List[Document]: list of loaded documents using SimpleDirectoryReader
|
92 |
-
# """
|
93 |
-
# # print("Loading documents...")
|
94 |
-
# # df = load_links_table(links_table)
|
95 |
-
# # # video_link_fn = lambda x: {"video_link": df[df["filename"] == x]["links"].values[0]}
|
96 |
-
# # reader = SimpleDirectoryReader(input_dir=transcripts_dir, recursive=True)
|
97 |
-
# # documents = reader.load_data()
|
98 |
-
# # # get 30 random documents with replacement
|
99 |
-
# # documents = random.choices(documents, k=30)
|
100 |
-
# # for document in documents:
|
101 |
-
# # # randomly choose df index
|
102 |
-
# # df_index = random.randint(0, len(df) - 1)
|
103 |
-
# # document.metadata["title"] = df.iloc[df_index]["title"]
|
104 |
-
# # document.metadata["brightcove_video_link"] = df.iloc[df_index][
|
105 |
-
# # "brightcove_video_link"
|
106 |
-
# # ]
|
107 |
-
# # document.metadata["url"] = df.iloc[df_index]["url"]
|
108 |
-
# # # document.metadata["video_link"] = df[
|
109 |
-
# # # df["filename"] == document.metadata["file_name"]
|
110 |
-
# # # ]["links"].values[0]
|
111 |
-
# # # document.excluded_llm_metadata_keys.append("video_link")
|
112 |
-
# # # document.excluded_embed_metadata_keys.append("video_link")
|
113 |
-
# # # # for document in documents:
|
114 |
-
# # # # print(document.get_node_info)
|
115 |
-
# # # # print(documents[0].get_node_info)
|
116 |
-
# # print(f"Successfully loaded {len(documents)} transcripts.\n")
|
117 |
-
# # return documents
|
118 |
-
# print("Loading documents...")
|
119 |
-
# print(links_table)
|
120 |
-
# df = load_links_table(links_table)
|
121 |
-
# reader = SimpleDirectoryReader(input_dir=transcripts_dir, recursive=True)
|
122 |
-
# documents = reader.load_data()
|
123 |
-
# for document in documents:
|
124 |
-
# # df_index = random.randint(0, len(df) - 1)
|
125 |
-
# document_id = int(document.metadata["file_name"].split(".")[0].split("_")[-1])
|
126 |
-
# # find the corresponding row in the links table
|
127 |
-
# df_index = df[df["video_id"] == document_id]
|
128 |
-
# document.metadata["title"] = df_index["title"].values[0]
|
129 |
-
# document.metadata["brightcove_video_link"] = df_index[
|
130 |
-
# "brightcove_video_link"
|
131 |
-
# ].values[0]
|
132 |
-
# document.metadata["url"] = df_index["url"].values[0]
|
133 |
-
# # document.metadata["brightcove_video_link"] = df.iloc[df_index][
|
134 |
-
# # "brightcove_video_link"
|
135 |
-
# # ]
|
136 |
-
# # document.metadata["url"] = df.iloc[df_index]["url"]
|
137 |
-
# print(document.metadata)
|
138 |
-
# print(f"Successfully loaded {len(documents)} transcripts.\n")
|
139 |
-
# return documents
|
140 |
-
|
141 |
-
|
142 |
-
# def init_vector_store_index():
|
143 |
-
# """Initialize the vector store index.
|
144 |
-
|
145 |
-
# Args:
|
146 |
-
# documents (List[Document]): The documents to be indexed.
|
147 |
-
# collection_name (str, optional): The name of the Milvus Collection to use. Defaults to "llamalection".
|
148 |
-
# """
|
149 |
-
# llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
|
150 |
-
# service_context = ServiceContext.from_defaults(llm=llm)
|
151 |
-
# storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
152 |
-
# print(PERSIST_DIR)
|
153 |
-
# index = load_index_from_storage(storage_context, service_context=service_context)
|
154 |
-
# return index
|
155 |
-
|
156 |
-
|
157 |
-
# def parse_url(url):
|
158 |
-
# """Parse the given URL to extract the video ID.
|
159 |
-
|
160 |
-
# Args:
|
161 |
-
# url (str): The URL to parse.
|
162 |
-
|
163 |
-
# Returns:
|
164 |
-
# str: The extracted video ID.
|
165 |
-
# """
|
166 |
-
# video_id = url.split("/")[4]
|
167 |
-
# return video_id
|
168 |
-
|
169 |
-
|
170 |
-
# def answer_prompt_with_template(query_str, index, url):
|
171 |
-
# """Answer the given prompt using the given vector store index.
|
172 |
-
|
173 |
-
# Args:
|
174 |
-
# query_str (str): The prompt to answer.
|
175 |
-
# index (VectorStoreIndex): The vector store index to use.
|
176 |
-
# """
|
177 |
-
# # template = """
|
178 |
-
# # Context information is below.
|
179 |
-
# # ---------------------
|
180 |
-
# # {context_str}
|
181 |
-
# # ---------------------
|
182 |
-
# # Given the context information and not prior knowledge, answer the query.
|
183 |
-
# # Please provide your answer in maximum 5 sentences. The first few sentences should be a direct answer to the question.
|
184 |
-
# # The second to last sentence should be in the format "For more information, refer to video |url| at |brightcove_video_link|".
|
185 |
-
# # You should the video_link metadata of the parent document to fill in the video link (for example, https://drtalks.com/videos/virtual-assistants).
|
186 |
-
# # If your answer has multiple sources, just write them one after the other.
|
187 |
-
# # In the end of every response, write "Thanks for asking!" or a similar phrase to thank the user for his request.
|
188 |
-
# # If the query is irrelevant (i.e. it contains no information or is not a question), respond with "I'm sorry, I cannot answer that question. Please, provide more context".
|
189 |
-
# # If the query has the word 'help' in it (i.e. "Help", "can you help me", "I need help"), respond with "Need help? Find guides and find quick solutions to common questions here: https://drtalks.zendesk.com/hc/en-us".
|
190 |
-
# # Some examples are given below.
|
191 |
-
|
192 |
-
# # Query: What do I need to know about fasting and aging?
|
193 |
-
# # Response: Fasting can have significant health benefits, including anti-aging effects by promoting autophagy and cellular renewal. It is essential to understand the balance between fasting and nourishment to support overall health and longevity. Incorporating fasting into your routine can help improve metabolic health, reduce inflammation, and potentially slow down the aging process. For more information, refer to the video https://drtalks.com/videos/turning-on-autophagy at https://players.brightcove.net/6314452011001/default_default/index.html?videoId=6342064815112&t=69s. Thanks for asking!
|
194 |
-
|
195 |
-
# # Query: Which lifestyle practices should I follow to ensure healthy aging?
|
196 |
-
# # Response: To ensure healthy aging, you should focus on incorporating nutrient-rich foods like liver, mushrooms, eggs, greens, and good fats into your diet. It is also essential to maintain a balanced and varied diet, including options for vegan/vegetarian eating and seasonal foods. Additionally, prioritize lifestyle factors such as stress management, adequate sleep, and regular exercise to impact gene expression positively and promote overall health. For more information, refer to the video https://drtalks.com/videos/younger-you-aging-is-optional at https://players.brightcove.net/6314452011001/default_default/index.html?videoId=6342076056112&t=242s. Thanks for asking!
|
197 |
-
|
198 |
-
# # Query: {query_str}
|
199 |
-
# # Answer:
|
200 |
-
# # """
|
201 |
-
# template = """
|
202 |
-
# Context information is below.
|
203 |
-
# ---------------------
|
204 |
-
# {context_str}
|
205 |
-
# ---------------------
|
206 |
-
# Given the context information and not prior knowledge, answer the query.
|
207 |
-
# Please provide your answer in maximum 5 sentences. The first few sentences should be a direct answer to the question.
|
208 |
-
# If the query is irrelevant (i.e. it contains no information or is not a question), respond with "I'm sorry, I cannot answer that question. Please, provide more context".
|
209 |
-
# If the query has the word 'help' in it (i.e. "Help", "can you help me", "I need help"), respond with "Need help? Find guides and find quick solutions to common questions here: https://drtalks.zendesk.com/hc/en-us".
|
210 |
-
# Some examples are given below.
|
211 |
-
|
212 |
-
# Query: What do I need to know about fasting and aging?
|
213 |
-
# Response: Fasting can have significant health benefits, including anti-aging effects by promoting autophagy and cellular renewal. It is essential to understand the balance between fasting and nourishment to support overall health and longevity. Incorporating fasting into your routine can help improve metabolic health, reduce inflammation, and potentially slow down the aging process.
|
214 |
-
|
215 |
-
# Query: Which lifestyle practices should I follow to ensure healthy aging?
|
216 |
-
# Response: To ensure healthy aging, you should focus on incorporating nutrient-rich foods like liver, mushrooms, eggs, greens, and good fats into your diet. It is also essential to maintain a balanced and varied diet, including options for vegan/vegetarian eating and seasonal foods. Additionally, prioritize lifestyle factors such as stress management, adequate sleep, and regular exercise to impact gene expression positively and promote overall health.
|
217 |
-
|
218 |
-
# Query: {query_str}
|
219 |
-
# Answer:
|
220 |
-
# """
|
221 |
-
# if "videos" in url:
|
222 |
-
# video_slug = parse_url(url)
|
223 |
-
# print(video_slug)
|
224 |
-
# filters = MetadataFilters(
|
225 |
-
# filters=[ExactMatchFilter(key="slug", value=video_slug)]
|
226 |
-
# )
|
227 |
-
# qa_prompt_tmpl = PromptTemplate(template)
|
228 |
-
# query_engine = index.as_query_engine(
|
229 |
-
# text_qa_template=qa_prompt_tmpl, similarity_top_k=4, filters=filters
|
230 |
-
# )
|
231 |
-
# response = query_engine.query(query_str)
|
232 |
-
# print(response.source_nodes)
|
233 |
-
# else:
|
234 |
-
# qa_prompt_tmpl = PromptTemplate(template)
|
235 |
-
# query_engine = index.as_query_engine(
|
236 |
-
# text_qa_template=qa_prompt_tmpl, similarity_top_k=4
|
237 |
-
# )
|
238 |
-
# response = query_engine.query(query_str)
|
239 |
-
# print("Answer: ")
|
240 |
-
# url_with_ts = response.source_nodes[0].metadata["url_with_timestamp"]
|
241 |
-
# if (
|
242 |
-
# "zendesk" not in response.response
|
243 |
-
# and "I cannot answer that question" not in response.response
|
244 |
-
# ):
|
245 |
-
# final_response = f"{response.response} For more information, refer to {url_with_ts}. Thanks for asking!"
|
246 |
-
# else:
|
247 |
-
# final_response = response.response
|
248 |
-
# return final_response
|
249 |
-
|
250 |
import logging
|
251 |
import os
|
252 |
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
import random
|
app/utils/prompt_suggestions.py
CHANGED
@@ -1,309 +1,3 @@
|
|
1 |
-
# import os
|
2 |
-
# import re
|
3 |
-
# from typing import List
|
4 |
-
|
5 |
-
# import requests
|
6 |
-
# from bs4 import BeautifulSoup
|
7 |
-
# from openai import OpenAI
|
8 |
-
|
9 |
-
# HEADERS = {
|
10 |
-
# "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
|
11 |
-
# }
|
12 |
-
|
13 |
-
# PAGE_TYPES = {
|
14 |
-
# # "topic_page": r"https://drtalks\.com/topics/\S+",
|
15 |
-
# # "video_page": r"https://drtalks\.com/videos/\S+",
|
16 |
-
# "topic_page": r"https://drtalks\S*/topics/",
|
17 |
-
# "video_page": r"https://drtalks\S*/videos/",
|
18 |
-
# }
|
19 |
-
|
20 |
-
# from dotenv import load_dotenv
|
21 |
-
|
22 |
-
# load_dotenv()
|
23 |
-
# api_key = os.getenv("OPENAI_API_KEY")
|
24 |
-
|
25 |
-
|
26 |
-
# def get_page(url: str, headers: dict = HEADERS):
|
27 |
-
# """Get the webpage using the given URL and BeautifulSoup.
|
28 |
-
|
29 |
-
# Args:
|
30 |
-
# url (str): The URL of the webpage to retrieve.
|
31 |
-
|
32 |
-
# Returns:
|
33 |
-
# BeautifulSoup: The BeautifulSoup object of the webpage.
|
34 |
-
# """
|
35 |
-
# response = requests.get(url, headers=headers)
|
36 |
-
# if response.status_code == 200:
|
37 |
-
# soup = BeautifulSoup(response.text, "html.parser")
|
38 |
-
# return soup
|
39 |
-
# else:
|
40 |
-
# print("Failed to retrieve the webpage.")
|
41 |
-
|
42 |
-
|
43 |
-
# def get_page_type(url: str):
|
44 |
-
# """Get the type of the webpage using the given URL.
|
45 |
-
|
46 |
-
# Args:
|
47 |
-
# url (str): The URL of the webpage to retrieve.
|
48 |
-
|
49 |
-
# Returns:
|
50 |
-
# str: The type of the webpage.
|
51 |
-
# """
|
52 |
-
# for key, pattern in PAGE_TYPES.items():
|
53 |
-
# if re.match(pattern, url):
|
54 |
-
# return key
|
55 |
-
# return None
|
56 |
-
|
57 |
-
|
58 |
-
# def get_topics_and_summary(soup: BeautifulSoup, page_type: str):
|
59 |
-
# """Get the topics and summary of the given webpage.
|
60 |
-
|
61 |
-
# Args:
|
62 |
-
# soup (BeautifulSoup): The BeautifulSoup object of the webpage.
|
63 |
-
|
64 |
-
# Returns:
|
65 |
-
# Tuple[List[str], str]: The list of topics and the summary of the webpage.
|
66 |
-
# """
|
67 |
-
# # topics = [topic.text for topic in soup.find_all('h2')]
|
68 |
-
# # summary = soup.find('div', class_='entry-content').text
|
69 |
-
# # return topics, summary
|
70 |
-
# if page_type == "video_page":
|
71 |
-
# video_topics = soup.find("div", class_="video-topics-block")
|
72 |
-
# video_summary = soup.find("div", class_="video-info-content")
|
73 |
-
# try:
|
74 |
-
# topics = [topic.text.strip() for topic in video_topics.find_all("a")]
|
75 |
-
# summary_list = [
|
76 |
-
# topic.text.strip()
|
77 |
-
# for topic in video_summary.find_all("li")
|
78 |
-
# if not "This video is part of the" in topic.text.strip()
|
79 |
-
# ]
|
80 |
-
# print(summary_list)
|
81 |
-
# except Exception as e:
|
82 |
-
# print(f"Failed to retrieve the topics and summary: {e}")
|
83 |
-
# return [], []
|
84 |
-
# elif page_type == "topic_page":
|
85 |
-
# topic_title = soup.find(
|
86 |
-
# "h1",
|
87 |
-
# class_="mb-6 text-white text-[48px] lg:text-[64px] uppercase font-bold",
|
88 |
-
# )
|
89 |
-
# topic_subcontent = soup.find(
|
90 |
-
# "p",
|
91 |
-
# class_="mb-6 pb-6 border-b border-drtalksSearchBorder text-white text-lg",
|
92 |
-
# )
|
93 |
-
# try:
|
94 |
-
# topics = [topic_title.text.strip()]
|
95 |
-
# summary_list = [topic_subcontent.text.strip()]
|
96 |
-
# except Exception as e:
|
97 |
-
# print(f"Failed to retrieve the title and subcontent: {e}")
|
98 |
-
# return [], []
|
99 |
-
# else:
|
100 |
-
# topics = []
|
101 |
-
# summary_list = []
|
102 |
-
# return topics, summary_list
|
103 |
-
|
104 |
-
|
105 |
-
# def parse_history(data):
|
106 |
-
# messages = []
|
107 |
-
|
108 |
-
# for item in data:
|
109 |
-
# if item["position"] == "right":
|
110 |
-
# messages.append(("user", item["text"]))
|
111 |
-
# elif item["position"] == "left":
|
112 |
-
# messages.append(("assistant", item["text"]))
|
113 |
-
|
114 |
-
# grouped_messages = [messages[i : i + 2] for i in range(0, len(messages), 2)]
|
115 |
-
|
116 |
-
# processed_messages = []
|
117 |
-
|
118 |
-
# for grouped_message in grouped_messages:
|
119 |
-
# if (
|
120 |
-
# grouped_message[1][1]
|
121 |
-
# == "Need help? Find guides and find quick solutions to common questions here: https://drtalks.zendesk.com/hc/en-us."
|
122 |
-
# ):
|
123 |
-
# continue
|
124 |
-
# elif (
|
125 |
-
# grouped_message[1][1]
|
126 |
-
# == "I'm sorry, I cannot answer that question. Please provide more context."
|
127 |
-
# ):
|
128 |
-
# continue
|
129 |
-
# else:
|
130 |
-
# processed_messages.append(grouped_message[0])
|
131 |
-
# processed_messages.append(grouped_message[1])
|
132 |
-
# return processed_messages
|
133 |
-
|
134 |
-
|
135 |
-
# def get_suggestions(topics: List[str] = [], summary: List[str] = [], history=[]):
|
136 |
-
# """Get the suggestions for the given topics and summary.
|
137 |
-
|
138 |
-
# Args:
|
139 |
-
# topics (List[str]): The list of topics for the video.
|
140 |
-
# summary (List[str]): The list of summary elements of the video.
|
141 |
-
|
142 |
-
# Returns:
|
143 |
-
# List[str]: The list of suggestions for the given topics and summary.
|
144 |
-
# """
|
145 |
-
# # suggestions = []
|
146 |
-
# # for topic in topics:
|
147 |
-
# # suggestions.append(f"Can you tell me more about {topic}?")
|
148 |
-
# # suggestions.append("Can you summarize the video?")
|
149 |
-
# # return suggestions
|
150 |
-
# client = OpenAI()
|
151 |
-
# prompt_with_summary_and_topics = f"""
|
152 |
-
# Generate a 3-5 short user request suggestions based on this chunk of text:
|
153 |
-
|
154 |
-
# ```
|
155 |
-
# {summary}
|
156 |
-
# ```
|
157 |
-
# and these keywords: {topics}.
|
158 |
-
|
159 |
-
# Here's the example of the user request suggestions:
|
160 |
-
# ```
|
161 |
-
# 1. How does nitric oxide keep your heart pumping?
|
162 |
-
# 2. How do I ensure healthy aging?
|
163 |
-
# 3. What is the long-term effect of sugar on my health?
|
164 |
-
# 4. Explain the relationship between trauma and addiction.
|
165 |
-
# 5. How does the brain process emotions?
|
166 |
-
# ```
|
167 |
-
|
168 |
-
# If the summary and keywords are not provided, just generate the user request suggestions based on the example.
|
169 |
-
# Do not output [] or None in any case – your response should only include the bullet points, no other commentaries.
|
170 |
-
|
171 |
-
# The output format should look like this:
|
172 |
-
# ```
|
173 |
-
# 1. What do I need to know about dementia?
|
174 |
-
# 2. How do treat COVID effectively?
|
175 |
-
# 3. How can CBD help with dementia treatment?
|
176 |
-
# 4. Can peptides help with treating cancer?
|
177 |
-
# 5. Can fasting help with longevity?
|
178 |
-
# ```
|
179 |
-
# """
|
180 |
-
|
181 |
-
# prompt_without_summary_and_topics = f"""
|
182 |
-
# Generate a 3-5 short user request suggestions.
|
183 |
-
|
184 |
-
# The questions you generate should be mostly based on these 5 topics:
|
185 |
-
# - Alzheimer's
|
186 |
-
# - Cancer
|
187 |
-
# - Diabetes
|
188 |
-
# - Hypertension
|
189 |
-
# - Heart Disease
|
190 |
-
|
191 |
-
# Here's the example of the user request suggestions:
|
192 |
-
# ```
|
193 |
-
# 1. How does nitric oxide keep your heart pumping?
|
194 |
-
# 2. How do I ensure healthy aging?
|
195 |
-
# 3. What is the long-term effect of sugar on my health?
|
196 |
-
# 4. Explain the relationship between trauma and addiction.
|
197 |
-
# 5. How does the brain process emotions?
|
198 |
-
# ```
|
199 |
-
# You should always focus on providing the user with the most relevant questions based on the 5 topics I told you (Alzheimer's, Cancer, Diabetes, Hypertension, Heart Disease)
|
200 |
-
# Do not output [] or None in any case – your response should only include the bullet points, no other commentaries.
|
201 |
-
|
202 |
-
# The output format should look like this:
|
203 |
-
# ```
|
204 |
-
# 1. What do I need to know about dementia?
|
205 |
-
# 2. How do treat COVID effectively?
|
206 |
-
# 3. How can CBD help with dementia treatment?
|
207 |
-
# 4. Can peptides help with treating cancer?
|
208 |
-
# 5. Can fasting help with longevity?
|
209 |
-
# ```
|
210 |
-
|
211 |
-
# """
|
212 |
-
# history = parse_history(history)
|
213 |
-
# prompt_history = f"""
|
214 |
-
# Generate 1-3 user request suggestions based on this history of conversation between the user and the assistant:
|
215 |
-
|
216 |
-
# ```
|
217 |
-
# {history}
|
218 |
-
# ```
|
219 |
-
|
220 |
-
# You should provide some questions that user might ask based on the previous messages he sent to the assistant.
|
221 |
-
# These questions should be related to the topics discussed in the conversation.
|
222 |
-
# The questions should be concise and on point.
|
223 |
-
|
224 |
-
# The questions may include:
|
225 |
-
# - Asking for more information on a specific topic
|
226 |
-
# - Asking for clarification on a point
|
227 |
-
# - Asking for a summary of the conversation
|
228 |
-
# - Asking for a recommendation or advice
|
229 |
-
# - Asking for a comparison between two topics
|
230 |
-
|
231 |
-
# Here's the example of the user request suggestions:
|
232 |
-
# ```
|
233 |
-
# 1. Can you tell me more about the benefits of meditation?
|
234 |
-
# 2. What is the difference between type 1 and type 2 diabetes?
|
235 |
-
# 3. Can you summarize the conversation we had about intermittent fasting?
|
236 |
-
# ```
|
237 |
-
|
238 |
-
# The output format should look like this:
|
239 |
-
# ```
|
240 |
-
# 1. <suggestion>
|
241 |
-
# 2. <suggestion>
|
242 |
-
# 3. <suggestion>
|
243 |
-
# ```
|
244 |
-
# """
|
245 |
-
# if history:
|
246 |
-
# response = client.chat.completions.create(
|
247 |
-
# model="gpt-3.5-turbo-0125",
|
248 |
-
# messages=[
|
249 |
-
# {
|
250 |
-
# "role": "system",
|
251 |
-
# "content": "You are a helpful assistant that possesses a great knowledge of user request recommendations.",
|
252 |
-
# },
|
253 |
-
# {"role": "user", "content": prompt_history},
|
254 |
-
# ],
|
255 |
-
# )
|
256 |
-
# elif topics and summary:
|
257 |
-
# response = client.chat.completions.create(
|
258 |
-
# model="gpt-3.5-turbo-0125",
|
259 |
-
# messages=[
|
260 |
-
# {
|
261 |
-
# "role": "system",
|
262 |
-
# "content": "You are a helpful assistant that possesses a great knowledge of user request recommendations.",
|
263 |
-
# },
|
264 |
-
# {"role": "user", "content": prompt_with_summary_and_topics},
|
265 |
-
# ],
|
266 |
-
# )
|
267 |
-
# else:
|
268 |
-
# response = client.chat.completions.create(
|
269 |
-
# model="gpt-3.5-turbo-0125",
|
270 |
-
# messages=[
|
271 |
-
# {
|
272 |
-
# "role": "system",
|
273 |
-
# "content": "You are a helpful assistant that possesses a great knowledge of user request recommendations.",
|
274 |
-
# },
|
275 |
-
# {"role": "user", "content": prompt_without_summary_and_topics},
|
276 |
-
# ],
|
277 |
-
# )
|
278 |
-
# content = response.choices[0].message.content
|
279 |
-
# content = content.replace("```", "")
|
280 |
-
# return content
|
281 |
-
|
282 |
-
|
283 |
-
# def generate_suggestions(url: str, history: List[dict]):
|
284 |
-
# """Generate the suggestions for the given URL.
|
285 |
-
|
286 |
-
# Args:
|
287 |
-
# url (str): The URL of the webpage.
|
288 |
-
|
289 |
-
# Returns:
|
290 |
-
# List[str]: The list of suggestions for the given URL.
|
291 |
-
# """
|
292 |
-
# url = url.strip()
|
293 |
-
# if history != []:
|
294 |
-
# suggestions = get_suggestions(history=history)
|
295 |
-
# else:
|
296 |
-
# soup = get_page(url)
|
297 |
-
# topics, summary = get_topics_and_summary(soup, get_page_type(url))
|
298 |
-
# print(topics, summary)
|
299 |
-
# suggestions = get_suggestions(topics, summary)
|
300 |
-
# return suggestions
|
301 |
-
|
302 |
-
|
303 |
-
# # if __name__ == "__main__":
|
304 |
-
# # url = "https://drtalks.com/videos/healing-your-heart-with-a-colorful-plate/"
|
305 |
-
# # print(generate_suggestions(url))
|
306 |
-
|
307 |
import os
|
308 |
import re
|
309 |
from typing import List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
from typing import List
|