seoAnalyzerGPT / ask_questions.py
pranked03's picture
new changes
997831e
raw
history blame contribute delete
No virus
2.87 kB
import pandas as pd
import openai
import numpy as np
from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
import streamlit as st
openai.api_key = st.secrets["openai_api_key"]
df=pd.read_csv('processed/embeddings.csv', index_col=0)
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
def create_context(
question, df, max_len=1800, size="ada"
):
# Create a context for a question by finding the most similar context from the dataframe
# Get the embeddings for the question
q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
# Get the distances from the embeddings
df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')
returns = []
cur_len = 0
# Sort by distance and add the text to the context until the context is too long
for i, row in df.sort_values('distances', ascending=True).iterrows():
# Add the length of the text to the current length
cur_len += row['n_tokens'] + 4
# If the context is too long, break
if cur_len > max_len:
break
# Else add it to the text that is being returned
returns.append(row["text"])
# Return the context
return "\n\n###\n\n".join(returns)
def answer_question(
df,
model="text-davinci-003",
question="Am I allowed to publish model outputs to Twitter, without a human review?",
max_len=3000,
size="ada",
debug=False,
max_tokens=500,
stop_sequence=None
):
#Answer a question based on the most similar context from the dataframe texts
context = create_context(
question,
df,
max_len=max_len,
size=size,
)
# If debug, print the raw model response
if debug:
print("Context:\n" + context)
print("\n\n")
try:
# Create a completions using the questin and context
response = openai.Completion.create(
prompt=f"You are an SEO anaylzer. \nYou will be given:\n An issue, \nthe description of the issue\nthe items that cause the issue.\n Describe the Issue. Show the appropriate solution to the issue. Implement the solution to the issue and show the fix on the given items in the issue. Give only the fix to the item given in the issue. \nContext: {context}\n\n---\n\nIssue: {question}",
temperature=0,
max_tokens=max_tokens,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
stop=stop_sequence,
model=model
)
return response["choices"][0]["text"].strip()
except Exception as e:
print(e)
return ""