Spaces:

Bandipreethamreddy
/

hello-llm

Sleeping

App Files Files

xet

Community

hello-llm / app.py

Bandipreethamreddy

Update app.py

7d158fe verified over 1 year ago

raw

history blame contribute delete

6.13 kB



	import os
	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	import google.generativeai as genai
	from langchain_community.vectorstores import FAISS
	from langchain.chains.question_answering import load_qa_chain
	from langchain.prompts import PromptTemplate
	from dotenv import load_dotenv



	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import nltk
	from nltk.corpus import stopwords
	from langchain_community.llms import CTransformers


	# Load environment variables
	load_dotenv()
	google_api_key = os.getenv("GOOGLE_API_KEY")
	if not google_api_key:
	raise ValueError("Google API key not found. Please check your environment variables.")
	genai.configure(api_key=google_api_key)

	# Download stopwords
	nltk.download('stopwords')
	stop_words = stopwords.words('english')
	custom_stopwords = ["what", "is", "how", "who", "explain", "about", "?", "please", "hey", "whatsup", "can u explain"]
	stop_words.extend(custom_stopwords)

	def get_pdf_text(pdf_docs):
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	text += page.extract_text() or ""
	return text

	def get_text_chunks(text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
	return text_splitter.split_text(text)

	def get_vector_store(text_chunks):
	try:
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
	vector_store.save_local("faiss_index")
	except Exception as e:
	st.error(f"Error during embedding: {e}")

	def get_conversational_chain():
	prompt_template = """
	Please provide a detailed answer based on the provided context. If the necessary information to answer the question is not present in the context, respond with 'The answer is not available in the context'

	Context:
	{context}

	Question:
	{question}

	Answer:
	"""
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
	prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
	return load_qa_chain(model, chain_type="stuff", prompt=prompt)

	def get_llama_response(input_text, no_words, blog_style, response_language):
	llm = CTransformers(
	model='llama-2-7b-chat.ggmlv3.q8_0.bin',
	model_type='llama',
	config={'max_new_tokens': 500, 'temperature': 0.01}
	)
	template = """
	Given some information of '{input_text}', provide a concise summary suitable for a {blog_style} blog post in approximately {no_words} words. The total response should be in {response_language} language. Focus on key aspects and provide accurate information.
	"""

	prompt = PromptTemplate(input_variables=["blog_style", "input_text", 'no_words', 'response_language'],
	template=template)

	response = llm(prompt.format(input_text=input_text, no_words=no_words, blog_style=blog_style, response_language=response_language))
	return response

	def calculate_cosine_similarity(text, user_question):
	vectorizer = TfidfVectorizer(stop_words=list(stop_words))
	tfidf_matrix = vectorizer.fit_transform([text, user_question])
	cos_similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
	return cos_similarity

	def translate_text(text, dest_language):
	translator = Translator()
	translation = translator.translate(text, dest=dest_language)
	return translation.text

	def user_input(user_question, raw_text):
	try:
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
	docs = new_db.similarity_search(user_question)

	gemini_chain = get_conversational_chain()
	gemini_response = gemini_chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
	initial_response = gemini_response["output_text"]
	except Exception as e:
	# st.error(f"Error during question answering: {e}")
	initial_response = "The provided context does not contain any information"

	similarity_score = calculate_cosine_similarity(raw_text, user_question)
	st.write("Cosine similarity score: ", similarity_score)

	if "The answer is not available in the context" in initial_response or "The provided context does not contain any information" in initial_response:
	if similarity_score > 0.00125:
	refined_response = get_llama_response(user_question, no_words=500, blog_style="detailed")
	else:
	refined_response = "I'm sorry, I cannot answer this question based on the provided context."
	else:
	refined_response = get_llama_response(initial_response, no_words=500, blog_style="detailed")


	st.write("Generated Response:", refined_response )







	def main():
	st.set_page_config(page_title="Chat With AUTHOR", page_icon="📚", layout='centered')
	st.header("Enhance Understanding with Gemini and LLaMA-2 models 🤖")



	user_question = st.text_input("Ask a Question from the PDF Files uploaded")



	with st.sidebar:
	st.title("Menu:")
	pdf_docs = st.file_uploader("Upload your PDF Files", accept_multiple_files=True)
	if st.button("Submit & Process"):
	with st.spinner("Processing..."):
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	get_vector_store(text_chunks)
	st.success("Done")

	if user_question:
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	get_vector_store(text_chunks)
	user_input(user_question, raw_text, response_language)

	if __name__ == "__main__":
	main()