nitrozen-gpt / create_embedding.py
saga24's picture
Upload create_embedding.py
5081502
# **
top_k = 3
splitter='#--'
import json
import streamlit as st
import pandas as pd
import numpy as np
import time
import os
import openai
import requests
from PIL import Image
from io import BytesIO
import openai, numpy as np
import re
openai.api_key = os.getenv("API_KEY")
from openai.error import RateLimitError
import backoff
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_embedding(text, model="text-embedding-ada-002"):
return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']
f = open("./finalreact.txt", "r")
text = f.read()
paras=text.split(splitter)
paras_temp = paras.copy()
paras_clean = []
# merge short paras
for i in range(len(paras_temp)):
if len(paras_temp[i])==0: continue
elif len(paras_temp[i])<200:
paras_temp[i+1] = paras_temp[i] + ' ' + paras_temp[i+1]
else:
paras_clean.append(paras_temp[i])
len(paras_temp),len(paras_clean)
df = pd.DataFrame(columns=['Text','Embeddings', 'Prompt Token' , 'Total Tokens'])
df['Text']=paras_clean
df['Embeddings'] = df.apply(lambda x: get_embedding(x['Text']), axis=1)
df.to_csv("embeddings.csv", index=False)