Spaces:
Runtime error
Runtime error
File size: 6,038 Bytes
d721044 e9acf3d 11f40af 6ce5e37 d721044 03820d9 42aa5be 4442e6f a4cba59 2f3fa8d c0e413c 1a7ef67 ef0ea8d 4442e6f a4cba59 4442e6f db7c1dc 4442e6f 868d663 f55eb91 c144c76 868d663 fefa610 c144c76 4a6bbfc db7c1dc 3928240 de0d883 cd64fe3 0fbe1e9 b67f2fd 8939a92 b67f2fd 8939a92 018ed79 b67f2fd b2497af e86b67a 3928240 868d663 d721044 4442e6f d721044 1a7ef67 fb36de7 d721044 1a7ef67 2378020 f9923bb d915fb9 a5545f5 40348d6 d721044 f5c721d 67392b4 ce73694 973ae0a ce73694 d721044 528a2e0 d721044 b81b3b2 fa1564c d721044 620ec62 d721044 e639f14 d721044 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
from wordcloud import WordCloud
import pandas as pd
import requests
import json
import hopsworks
import matplotlib.pyplot as plt
import os
import time
MODEL = "gpt-3.5-turbo"
API_URL = "https://api.openai.com/v1/chat/completions"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
project = hopsworks.login(project="ServerlessIntroIris")
fs = project.get_feature_store()
dataset1 = fs.get_feature_group(name="daily_topic_info").read()
df = dataset1
dataset2 = fs.get_feature_group(name="daily_document_info").read()
df2 = dataset2
topics = df['topic'].unique()
def gpt_predict(inputs, request:gr.Request=gr.State([]), top_p = 1, temperature = 1, chat_counter = 0,history =[]):
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature" : 1.0,
"top_p":1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}",
}
# print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages = []
for i, data in enumerate(history):
if i % 2 == 0:
role = 'user'
else:
role = 'assistant'
message = {}
message["role"] = role
message["content"] = data
messages.append(message)
message = conversation_history
messages.append(message)
payload = {
"model": MODEL,
"messages": messages,
"temperature" : temperature,
"top_p": top_p,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter += 1
history.append(inputs)
token_counter = 0
partial_words = ""
counter = 0
try:
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
response_code = f"{response}"
if response_code.strip() != "<Response [200]>":
#print(f"response code - {response}")
raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
out = []
for chunk in response.iter_lines():
#Skipping first chunk
if counter == 0:
counter += 1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
token_counter += 1
except Exception as e:
print (f'error found: {e}')
return partial_words
readable_topics_dic = dict()
input = "I have lists of multiple words : "
mrk = [] #most representative keyword (used if chatgpt doesn't work)
for t in topics.tolist():
if t != -1:
selected_data = df[df['topic'] == t]
keywords = selected_data['keywords'][selected_data.index[0]]
freq = selected_data["scores"][selected_data.index[0]]
keyword_freq_pairs = zip(keywords, freq)
most_frequent_keyword = max(keyword_freq_pairs, key=lambda x: x[1])
print(most_frequent_keyword[0].capitalize())
mrk.append(most_frequent_keyword[0].capitalize())
input += ", [" + ", ".join(keywords) + "]"
input += " I want you to give me only one precise word that best describes the theme of this list. If I give you multiple lists, I want you to give me one word with a maj in front for each of those lists, and separate them by // (your answer should contain only one word for each, if I give you 100 lists, You give me 100 words)"
new_topics = "".join(gpt_predict(input))
nt = new_topics.split("//")
#in case chatgpt overloaded
i = 0
if len(nt) < len(topics.tolist()):
nt = [f"Topic {o+1}: {mrk[o]}" for o in range(len(topics.tolist())-1)]
for t in topics.tolist():
if t != -1:
readable_topics_dic[nt[i]] = t
if i < len(nt)-1:
i += 1
def display_topics(topic):
topic = readable_topics_dic[topic]
# Filter DataFrame based on the selected topic
selected_data = df[df['topic'] == topic]
selected_data2 = df2[df2['topic'] == topic]
selected_data2 = selected_data2.sort_values(by='probability')
# Display relevant articles
articles = selected_data2['title']
links = selected_data2['link']
nb_art = min(4, len(links))
articles_ret = """## Most relevant articles
"""
for i in range(nb_art):
ind = articles.index[i]
articles_ret += f""" * [{articles[ind]}]({links[ind]})
"""
# Generate word cloud for keywords
keywords = selected_data['keywords'][selected_data.index[0]]
freq = selected_data["scores"][selected_data.index[0]]
keywords_wordcloud = dict()
for i, elem in enumerate(keywords):
keywords_wordcloud[elem] = freq[i]
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(keywords_wordcloud)
fig, ax = plt.subplots()
plt.axis("off")
ax =plt.imshow(wordcloud, interpolation='bilinear')
return articles_ret , fig
# Define Gradio interface
iface = gr.Interface(
fn=display_topics,
inputs=gr.Dropdown(nt, label="Topic"),
outputs=[gr.Markdown(label="Most relevant articles"),gr.Plot(label="Main Keywords")],
live=True,
examples=[]
)
# Launch the app
iface.launch()
|