newtry2 / app.py
shubhampal's picture
Update app.py
d1da9dc verified
raw
history blame
2.6 kB
!pip install transformers pandas langchain
!pip install -U langchain-community
!pip install sentence-transformers
!pip install chromadb
import os
print(os.getenv('KEY'))
KEY = os.getenv('KEY')
os.environ['HF_TOKEN']=KEY
os.environ['HUGGINGFACEHUB_API_TOKEN']=KEY
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
import pandas as pd
# Load the CSV file
df = pd.read_csv("web_data.csv")
# Load the HTML and TS files
with open("reports.component.html", "r", encoding="utf-8") as f:
reports_component_html = f.read()
with open("reports.module.ts", "r", encoding="utf-8") as f:
reports_module_ts = f.read()
# Create the embeddings
embeddings = HuggingFaceEmbeddings()
print(embeddings)
# Combine questions, answers, and file contents into a list of strings
texts = [f"Question: {q}\nAnswer: {a}" for q, a in zip(df['query'], df['responses'])]
texts.append(f"File: reports.component.html\nContent:\n{reports_component_html}")
texts.append(f"File: reports.module.ts\nContent:\n{reports_module_ts}")
# Split the texts into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = []
for text in texts:
chunks = text_splitter.split_text(text)
for chunk in chunks:
doc = Document(page_content=chunk, metadata={})
docs.append(doc)
# Create the vector store
db = Chroma.from_documents(docs, embeddings)
# Load the language model
# model = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.7, "max_length": 512})
model = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B", model_kwargs={"temperature": 0.7, "max_length": 512})
# model = HuggingFaceHub(repo_id="mlabonne/AlphaMonarch-7B", model_kwargs={"temperature": 0.7, "max_length": 512})
# Create the conversational retrieval chain
qa = ConversationalRetrievalChain.from_llm(model, db.as_retriever())
query = '''what all is present in reports module '''
result = qa({"question": query, "chat_history": []})
print(result['answer'])
def get_helpful_answer(context, query):
import re
pattern = re.compile(r"Helpful Answer:\s*(.*?)(?:Question:|\Z)", re.DOTALL)
match = pattern.search(context)
if match:
return match.group(1).strip()
else:
return "No helpful answer found."
# print the helpful answer
print(get_helpful_answer(result['answer'], query))