import streamlit as st
from langchain_pinecone import PineconeVectorStore
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import SentenceTransformerEmbeddings
from typing import List
import torch
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_community.llms import Together
from langchain.chains.question_answering import load_qa_chain
from pinecone import Pinecone, ServerlessSpec
import os

pinecone_api_key = os.getenv('pinecone_api_key')
together_api_key = os.getenv('together_api_key')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

index_name = 'lex-index'
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
pinecone = PineconeVectorStore(index_name=index_name, embedding=embeddings,pinecone_api_key = pinecone_api_key, text_key='title')

st.title("🦜🔗 Lex Fridman Podcast QnA")

st.markdown("This app  was developed by [Viren Dhanwani](https://github.com/virendhanwani) "
"by creating a Vector Database of transcripts of Lex Fridman Podcast using the [dataset](https://huggingface.co/datasets/jamescalam/lex-transcripts/viewer/default/train?row=30)  "
"and by implementing the Langhcain QA chain")

def generate_response(input_text):
    llm = Together(
        model="mistralai/Mistral-7B-Instruct-v0.2",
        temperature=0.7,
        max_tokens=500,
        top_k=50,
        together_api_key= together_api_key
    )
    chain = load_qa_chain(llm, chain_type='stuff')
    docs = pinecone.similarity_search(input_text, k=3)
    response = chain.run(input_documents=docs, question=input_text)
    return response


with st.container():
    messages = st.container(height=500)
    messages.chat_message("assistant").write('How may i help you ?')
    if prompt := st.chat_input("Say something"):
        messages.chat_message("user").write(prompt)
        messages.chat_message("assistant").write(generate_response(prompt))