Spaces:

shubhampal
/

newtry2

Runtime error

App Files Files Community

newtry2 / app.py

shubhampal

Update app.py

ceb805c verified 3 months ago

raw

history blame contribute delete

No virus

5.94 kB


	import os
	print(os.getenv('KEY'))
	KEY = os.getenv('KEY')
	os.environ['HF_TOKEN']=KEY
	os.environ['HUGGINGFACEHUB_API_TOKEN']=KEY

	# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	# from langchain import HuggingFaceHub
	# from langchain.vectorstores import Chroma
	# from langchain.chains import ConversationalRetrievalChain
	# from langchain.text_splitter import CharacterTextSplitter
	# from langchain.docstore.document import Document
	# import pandas as pd


	# # Load the CSV file
	# df = pd.read_csv("web_data.csv")

	# # Load the HTML and TS files
	# with open("reports.component.html", "r", encoding="utf-8") as f:
	# reports_component_html = f.read()

	# with open("reports.module.ts", "r", encoding="utf-8") as f:
	# reports_module_ts = f.read()

	# # Create the embeddings
	# embeddings = HuggingFaceEmbeddings()

	# print(embeddings)

	# # Combine questions, answers, and file contents into a list of strings
	# texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
	# texts.append(f"File: reports.component.html\nContent:\n{reports_component_html}")
	# texts.append(f"File: reports.module.ts\nContent:\n{reports_module_ts}")

	# # Split the texts into chunks
	# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	# docs = []
	# for text in texts:
	# chunks = text_splitter.split_text(text)
	# for chunk in chunks:
	# doc = Document(page_content=chunk, metadata={})
	# docs.append(doc)

	# # Create the vector store
	# db = Chroma.from_documents(docs, embeddings)

	# # Load the language model
	# model = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.7, "max_length": 512})
	# # model = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B", model_kwargs={"temperature": 0.7, "max_length": 512})
	# # model = HuggingFaceHub(repo_id="mlabonne/AlphaMonarch-7B", model_kwargs={"temperature": 0.7, "max_length": 512})

	# # Create the conversational retrieval chain
	# qa = ConversationalRetrievalChain.from_llm(model, db.as_retriever())


	# query = '''what all is present in reports module '''
	# result = qa({"question": query, "chat_history": []})
	# print(result['answer'])

	# def get_helpful_answer(context, query):
	# import re
	# pattern = re.compile(r"Helpful Answer:\s(.?)(?:Question:\|\Z)", re.DOTALL)
	# match = pattern.search(context)
	# if match:
	# return match.group(1).strip()
	# else:
	# return "No helpful answer found."


	# # print the helpful answer
	# print(get_helpful_answer(result['answer'], query))



	# CLAUDE IMPROVEMENT TRY


	import pandas as pd
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.chains import ConversationalRetrievalChain
	from langchain.llms import HuggingFaceHub
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	from langchain.chains.combine_documents.stuff import StuffDocumentsChain
	from langchain.schema import Document

	# Load and process data (unchanged)
	df = pd.read_csv("web_data.csv")

	with open("accounting.component.html", "r", encoding="utf-8") as f:
	reports_component_html = f.read()

	with open("accounting.component.ts", "r", encoding="utf-8") as f:
	reports_module_ts = f.read()

	# Improved text processing
	texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
	texts.append(f"File: accounting.component.html\nContent:\n{reports_component_html}")
	texts.append(f"File: accounting.component.ts\nContent:\n{reports_module_ts}")

	# More granular text splitting
	text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	docs = [Document(page_content=chunk, metadata={}) for text in texts for chunk in text_splitter.split_text(text)]

	# Create embeddings and vector store
	embeddings = HuggingFaceEmbeddings(model_name="meta-llama/Meta-Llama-3-8B-Instruct")
	db = Chroma.from_documents(docs, embeddings)

	# Improved language model configuration
	model = HuggingFaceHub(
	repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
	model_kwargs={"temperature": 0.3, "max_length": 512, "top_p": 0.95}
	)

	# Enhanced prompt template
	prompt_template = """
	Use the following pieces of context to answer the question at the end. If you don't know the answer, say "I don't have enough information to answer this question accurately."
	Aim to provide a concise yet informative answer within 500 characters.

	Context:
	{context}

	Question: {question}

	Confident and Accurate Answer:
	"""

	# Updated chains
	combine_docs_chain = StuffDocumentsChain(
	llm_chain=LLMChain(
	prompt=PromptTemplate(input_variables=['context', 'question'], template=prompt_template),
	llm=model
	),
	document_variable_name='context'
	)

	question_generator = LLMChain(
	prompt=PromptTemplate(
	input_variables=['chat_history', 'question'],
	template='Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question focused on Angular and TypeScript concepts.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:'
	),
	llm=model
	)

	# Create the improved conversational retrieval chain
	qa = ConversationalRetrievalChain(
	retriever=db.as_retriever(search_kwargs={"k": 3}),
	combine_docs_chain=combine_docs_chain,
	question_generator=question_generator,
	return_source_documents=True,
	verbose=True
	)

	# Function to run a query
	def run_query(query, chat_history=[]):
	result = qa({"question": query, "chat_history": chat_history})
	print("Question:", query)
	print("Answer:", result['answer'])
	print("Sources:", [doc.page_content[:50] + "..." for doc in result['source_documents']])
	return result

	# Example usage
	query = "Explain the code in summary in the accounting components TypeScript file."
	result = run_query(query)