Spaces:

shubhampal
/

newtry2

Runtime error

App Files Files Community

shubhampal commited on 27 days ago

Commit

ceb805c

•

1 Parent(s): 353952a

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -52

app.py CHANGED Viewed

@@ -5,69 +5,164 @@ KEY = os.getenv('KEY')
 os.environ['HF_TOKEN']=KEY
 os.environ['HUGGINGFACEHUB_API_TOKEN']=KEY
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from langchain import HuggingFaceHub
-from langchain.vectorstores import Chroma
-from langchain.chains import ConversationalRetrievalChain
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.docstore.document import Document
-import pandas as pd
-# Load the CSV file
-df = pd.read_csv("web_data.csv")
-# Load the HTML and TS files
-with open("reports.component.html", "r", encoding="utf-8") as f:
-    reports_component_html = f.read()
-with open("reports.module.ts", "r", encoding="utf-8") as f:
-    reports_module_ts = f.read()
-# Create the embeddings
-embeddings = HuggingFaceEmbeddings()
-print(embeddings)
-# Combine questions, answers, and file contents into a list of strings
-texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
-texts.append(f"File: reports.component.html\nContent:\n{reports_component_html}")
-texts.append(f"File: reports.module.ts\nContent:\n{reports_module_ts}")
-# Split the texts into chunks
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-docs = []
-for text in texts:
-    chunks = text_splitter.split_text(text)
-    for chunk in chunks:
-        doc = Document(page_content=chunk, metadata={})
-        docs.append(doc)
-# Create the vector store
-db = Chroma.from_documents(docs, embeddings)
-# Load the language model
-model = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.7, "max_length": 512})
-# model = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B", model_kwargs={"temperature": 0.7, "max_length": 512})
-# model = HuggingFaceHub(repo_id="mlabonne/AlphaMonarch-7B", model_kwargs={"temperature": 0.7, "max_length": 512})
-# Create the conversational retrieval chain
-qa = ConversationalRetrievalChain.from_llm(model, db.as_retriever())
-query = '''what all is present in reports module '''
-result = qa({"question": query, "chat_history": []})
-print(result['answer'])
-def get_helpful_answer(context, query):
-    import re
-    pattern = re.compile(r"Helpful Answer:\s*(.*?)(?:Question:|\Z)", re.DOTALL)
-    match = pattern.search(context)
-    if match:
-        return match.group(1).strip()
-    else:
-        return "No helpful answer found."
-# print the helpful answer
-print(get_helpful_answer(result['answer'], query))

 os.environ['HF_TOKEN']=KEY
 os.environ['HUGGINGFACEHUB_API_TOKEN']=KEY
+# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+# from langchain import HuggingFaceHub
+# from langchain.vectorstores import Chroma
+# from langchain.chains import ConversationalRetrievalChain
+# from langchain.text_splitter import CharacterTextSplitter
+# from langchain.docstore.document import Document
+# import pandas as pd
+# # Load the CSV file
+# df = pd.read_csv("web_data.csv")
+# # Load the HTML and TS files
+# with open("reports.component.html", "r", encoding="utf-8") as f:
+#     reports_component_html = f.read()
+# with open("reports.module.ts", "r", encoding="utf-8") as f:
+#     reports_module_ts = f.read()
+# # Create the embeddings
+# embeddings = HuggingFaceEmbeddings()
+# print(embeddings)
+# # Combine questions, answers, and file contents into a list of strings
+# texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
+# texts.append(f"File: reports.component.html\nContent:\n{reports_component_html}")
+# texts.append(f"File: reports.module.ts\nContent:\n{reports_module_ts}")
+# # Split the texts into chunks
+# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+# docs = []
+# for text in texts:
+#     chunks = text_splitter.split_text(text)
+#     for chunk in chunks:
+#         doc = Document(page_content=chunk, metadata={})
+#         docs.append(doc)
+# # Create the vector store
+# db = Chroma.from_documents(docs, embeddings)
+# # Load the language model
+# model = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.7, "max_length": 512})
+# # model = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B", model_kwargs={"temperature": 0.7, "max_length": 512})
+# # model = HuggingFaceHub(repo_id="mlabonne/AlphaMonarch-7B", model_kwargs={"temperature": 0.7, "max_length": 512})
+# # Create the conversational retrieval chain
+# qa = ConversationalRetrievalChain.from_llm(model, db.as_retriever())
+# query = '''what all is present in reports module '''
+# result = qa({"question": query, "chat_history": []})
+# print(result['answer'])
+# def get_helpful_answer(context, query):
+#     import re
+#     pattern = re.compile(r"Helpful Answer:\s*(.*?)(?:Question:|\Z)", re.DOTALL)
+#     match = pattern.search(context)
+#     if match:
+#         return match.group(1).strip()
+#     else:
+#         return "No helpful answer found."
+# # print the helpful answer
+# print(get_helpful_answer(result['answer'], query))
+# CLAUDE IMPROVEMENT TRY
+import pandas as pd
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.chains import ConversationalRetrievalChain
+from langchain.llms import HuggingFaceHub
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.chains.combine_documents.stuff import StuffDocumentsChain
+from langchain.schema import Document
+# Load and process data (unchanged)
+df = pd.read_csv("web_data.csv")
+with open("accounting.component.html", "r", encoding="utf-8") as f:
+    reports_component_html = f.read()
+with open("accounting.component.ts", "r", encoding="utf-8") as f:
+    reports_module_ts = f.read()
+# Improved text processing
+texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
+texts.append(f"File: accounting.component.html\nContent:\n{reports_component_html}")
+texts.append(f"File: accounting.component.ts\nContent:\n{reports_module_ts}")
+# More granular text splitting
+text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+docs = [Document(page_content=chunk, metadata={}) for text in texts for chunk in text_splitter.split_text(text)]
+# Create embeddings and vector store
+embeddings = HuggingFaceEmbeddings(model_name="meta-llama/Meta-Llama-3-8B-Instruct")
+db = Chroma.from_documents(docs, embeddings)
+# Improved language model configuration
+model = HuggingFaceHub(
+    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+    model_kwargs={"temperature": 0.3, "max_length": 512, "top_p": 0.95}
+)
+# Enhanced prompt template
+prompt_template = """
+Use the following pieces of context to answer the question at the end. If you don't know the answer, say "I don't have enough information to answer this question accurately."
+Aim to provide a concise yet informative answer within 500 characters.
+Context:
+{context}
+Question: {question}
+Confident and Accurate Answer:
+"""
+# Updated chains
+combine_docs_chain = StuffDocumentsChain(
+    llm_chain=LLMChain(
+        prompt=PromptTemplate(input_variables=['context', 'question'], template=prompt_template),
+        llm=model
+    ),
+    document_variable_name='context'
+)
+question_generator = LLMChain(
+    prompt=PromptTemplate(
+        input_variables=['chat_history', 'question'],
+        template='Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question focused on Angular and TypeScript concepts.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:'
+    ),
+    llm=model
+)
+# Create the improved conversational retrieval chain
+qa = ConversationalRetrievalChain(
+    retriever=db.as_retriever(search_kwargs={"k": 3}),
+    combine_docs_chain=combine_docs_chain,
+    question_generator=question_generator,
+    return_source_documents=True,
+    verbose=True
+)
+# Function to run a query
+def run_query(query, chat_history=[]):
+    result = qa({"question": query, "chat_history": chat_history})
+    print("Question:", query)
+    print("Answer:", result['answer'])
+    print("Sources:", [doc.page_content[:50] + "..." for doc in result['source_documents']])
+    return result
+# Example usage
+query = "Explain the code in summary in the accounting components TypeScript file."
+result = run_query(query)