Spaces:
Runtime error
Runtime error
import os | |
print(os.getenv('KEY')) | |
KEY = os.getenv('KEY') | |
os.environ['HF_TOKEN']=KEY | |
os.environ['HUGGINGFACEHUB_API_TOKEN']=KEY | |
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
# from langchain import HuggingFaceHub | |
# from langchain.vectorstores import Chroma | |
# from langchain.chains import ConversationalRetrievalChain | |
# from langchain.text_splitter import CharacterTextSplitter | |
# from langchain.docstore.document import Document | |
# import pandas as pd | |
# # Load the CSV file | |
# df = pd.read_csv("web_data.csv") | |
# # Load the HTML and TS files | |
# with open("reports.component.html", "r", encoding="utf-8") as f: | |
# reports_component_html = f.read() | |
# with open("reports.module.ts", "r", encoding="utf-8") as f: | |
# reports_module_ts = f.read() | |
# # Create the embeddings | |
# embeddings = HuggingFaceEmbeddings() | |
# print(embeddings) | |
# # Combine questions, answers, and file contents into a list of strings | |
# texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])] | |
# texts.append(f"File: reports.component.html\nContent:\n{reports_component_html}") | |
# texts.append(f"File: reports.module.ts\nContent:\n{reports_module_ts}") | |
# # Split the texts into chunks | |
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
# docs = [] | |
# for text in texts: | |
# chunks = text_splitter.split_text(text) | |
# for chunk in chunks: | |
# doc = Document(page_content=chunk, metadata={}) | |
# docs.append(doc) | |
# # Create the vector store | |
# db = Chroma.from_documents(docs, embeddings) | |
# # Load the language model | |
# model = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.7, "max_length": 512}) | |
# # model = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B", model_kwargs={"temperature": 0.7, "max_length": 512}) | |
# # model = HuggingFaceHub(repo_id="mlabonne/AlphaMonarch-7B", model_kwargs={"temperature": 0.7, "max_length": 512}) | |
# # Create the conversational retrieval chain | |
# qa = ConversationalRetrievalChain.from_llm(model, db.as_retriever()) | |
# query = '''what all is present in reports module ''' | |
# result = qa({"question": query, "chat_history": []}) | |
# print(result['answer']) | |
# def get_helpful_answer(context, query): | |
# import re | |
# pattern = re.compile(r"Helpful Answer:\s*(.*?)(?:Question:|\Z)", re.DOTALL) | |
# match = pattern.search(context) | |
# if match: | |
# return match.group(1).strip() | |
# else: | |
# return "No helpful answer found." | |
# # print the helpful answer | |
# print(get_helpful_answer(result['answer'], query)) | |
# CLAUDE IMPROVEMENT TRY | |
import pandas as pd | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.llms import HuggingFaceHub | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain | |
from langchain.schema import Document | |
# Load and process data (unchanged) | |
df = pd.read_csv("web_data.csv") | |
with open("accounting.component.html", "r", encoding="utf-8") as f: | |
reports_component_html = f.read() | |
with open("accounting.component.ts", "r", encoding="utf-8") as f: | |
reports_module_ts = f.read() | |
# Improved text processing | |
texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])] | |
texts.append(f"File: accounting.component.html\nContent:\n{reports_component_html}") | |
texts.append(f"File: accounting.component.ts\nContent:\n{reports_module_ts}") | |
# More granular text splitting | |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
docs = [Document(page_content=chunk, metadata={}) for text in texts for chunk in text_splitter.split_text(text)] | |
# Create embeddings and vector store | |
embeddings = HuggingFaceEmbeddings(model_name="meta-llama/Meta-Llama-3-8B-Instruct") | |
db = Chroma.from_documents(docs, embeddings) | |
# Improved language model configuration | |
model = HuggingFaceHub( | |
repo_id="meta-llama/Meta-Llama-3-8B-Instruct", | |
model_kwargs={"temperature": 0.3, "max_length": 512, "top_p": 0.95} | |
) | |
# Enhanced prompt template | |
prompt_template = """ | |
Use the following pieces of context to answer the question at the end. If you don't know the answer, say "I don't have enough information to answer this question accurately." | |
Aim to provide a concise yet informative answer within 500 characters. | |
Context: | |
{context} | |
Question: {question} | |
Confident and Accurate Answer: | |
""" | |
# Updated chains | |
combine_docs_chain = StuffDocumentsChain( | |
llm_chain=LLMChain( | |
prompt=PromptTemplate(input_variables=['context', 'question'], template=prompt_template), | |
llm=model | |
), | |
document_variable_name='context' | |
) | |
question_generator = LLMChain( | |
prompt=PromptTemplate( | |
input_variables=['chat_history', 'question'], | |
template='Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question focused on Angular and TypeScript concepts.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:' | |
), | |
llm=model | |
) | |
# Create the improved conversational retrieval chain | |
qa = ConversationalRetrievalChain( | |
retriever=db.as_retriever(search_kwargs={"k": 3}), | |
combine_docs_chain=combine_docs_chain, | |
question_generator=question_generator, | |
return_source_documents=True, | |
verbose=True | |
) | |
# Function to run a query | |
def run_query(query, chat_history=[]): | |
result = qa({"question": query, "chat_history": chat_history}) | |
print("Question:", query) | |
print("Answer:", result['answer']) | |
print("Sources:", [doc.page_content[:50] + "..." for doc in result['source_documents']]) | |
return result | |
# Example usage | |
query = "Explain the code in summary in the accounting components TypeScript file." | |
result = run_query(query) | |