Spaces:
Runtime error
Runtime error
# ** | |
top_k = 3 | |
splitter='#--' | |
import json | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import time | |
import os | |
import openai | |
import requests | |
from PIL import Image | |
from io import BytesIO | |
import openai, numpy as np | |
import re | |
openai.api_key = os.getenv("API_KEY") | |
from openai.error import RateLimitError | |
import backoff | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
def get_embedding(text, model="text-embedding-ada-002"): | |
return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding'] | |
f = open("./finalreact.txt", "r") | |
text = f.read() | |
paras=text.split(splitter) | |
paras_temp = paras.copy() | |
paras_clean = [] | |
# merge short paras | |
for i in range(len(paras_temp)): | |
if len(paras_temp[i])==0: continue | |
elif len(paras_temp[i])<200: | |
paras_temp[i+1] = paras_temp[i] + ' ' + paras_temp[i+1] | |
else: | |
paras_clean.append(paras_temp[i]) | |
len(paras_temp),len(paras_clean) | |
df = pd.DataFrame(columns=['Text','Embeddings', 'Prompt Token' , 'Total Tokens']) | |
df['Text']=paras_clean | |
df['Embeddings'] = df.apply(lambda x: get_embedding(x['Text']), axis=1) | |
df.to_csv("embeddings.csv", index=False) |