Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.llms import HuggingFaceHub | |
import openai | |
import streamlit as st | |
import re | |
#openai_api_key = "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" | |
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1') | |
# Define the embedding function using HuggingFaceEmbeddings | |
embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1') | |
vectordb = Chroma(persist_directory= r"\vector db", #enter chroma directory | |
embedding_function=embeddings) | |
#index = pinecone.Index('langchain-chatbot') | |
# Create a retriever from the Chroma object | |
retriever = vectordb.as_retriever() | |
def find_match(input_text): | |
# Retrieve relevant documents based on the input query | |
docs = retriever.get_relevant_documents(input_text) | |
match_texts = [doc.page_content for doc in docs] | |
# Return the concatenated texts of the relevant documents | |
return "\n".join(match_texts) | |
from transformers import pipeline | |
# Load the text generation pipeline from Hugging Face | |
text_generator = pipeline("text-generation", model="gpt2") | |
def query_refiner(conversation, query): | |
# Formulate the prompt for the model | |
prompt = f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:" | |
# Generate the response using the Hugging Face model | |
response = text_generator(prompt, max_length=256, temperature=0.7, top_p=1.0, pad_token_id=text_generator.tokenizer.eos_token_id) | |
# Extract the refined query from the response | |
refined_query = response[0]['generated_text'].split('Refined Query:')[-1].strip() | |
return refined_query | |
def get_conversation_string(): | |
conversation_string = "" | |
for i in range(len(st.session_state['responses'])-1): | |
conversation_string += "Human: "+st.session_state['requests'][i] + "\n" | |
conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n" | |
return conversation_string | |
""" | |
from openai import OpenAI | |
from audio_recorder_streamlit import audio_recorder | |
client=OpenAI(api_key="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") | |
def speech_to_text(audio_data): | |
with open(audio_data, "rb") as audio_file: | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
response_format="text", | |
file=audio_file | |
) | |
return transcript | |
""" | |