tahirsher commited on
Commit
5eacffe
1 Parent(s): d426168

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -28
app.py CHANGED
@@ -6,21 +6,21 @@ from PyPDF2 import PdfReader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.vectorstores import FAISS
9
- from transformers import AutoModel, AutoTokenizer
10
  import torch
11
 
12
- # Set up Groq API key
13
- GROQ_API_KEY = os.getenv("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")
 
 
 
14
 
15
- # Initialize embedding model (using sentence-transformers model)
16
- tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
17
- embedding_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
18
 
19
  # List of Hugging Face PDF URLs
20
  PDF_URLS = [
21
  "https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/administrator92ada0936848e501425591b4ad0cd417.pdf",
22
  "https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/Pakistan%20Penal%20Code.pdf",
23
- # Add more document links as needed
24
  ]
25
 
26
  # Helper function to convert Hugging Face blob URLs to direct download URLs
@@ -33,7 +33,7 @@ def get_huggingface_raw_url(url):
33
  def fetch_pdf_text_from_huggingface(urls):
34
  text = ""
35
  for url in urls:
36
- raw_url = get_huggingface_raw_url(url) # Convert to direct download link
37
  response = requests.get(raw_url)
38
  if response.status_code == 200:
39
  pdf_file = BytesIO(response.content)
@@ -65,31 +65,17 @@ def load_or_create_vector_store(text_chunks):
65
  vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
66
  return vector_store
67
 
68
- # Call Groq API for generating summary based on the query and retrieved text
69
- def generate_summary_with_groq(query, retrieved_text):
70
- url = "https://api.groq.com/v1/chat/completions"
71
- headers = {
72
- "Authorization": f"Bearer {GROQ_API_KEY}",
73
- "Content-Type": "application/json"
74
- }
75
- payload = {
76
- "messages": [
77
- {"role": "user", "content": f"{query}\n\nRelated information:\n{retrieved_text}"}
78
- ],
79
- "model": "llama3-8b-8192",
80
- }
81
- response = requests.post(url, headers=headers, json=payload)
82
- if response.status_code == 200:
83
- return response.json()["choices"][0]["message"]["content"]
84
- else:
85
- st.error("Failed to generate summary with Groq API")
86
- return "Error in Groq API response"
87
 
88
  # Generate response for user query
89
  def user_input(user_question, vector_store):
90
  docs = vector_store.similarity_search(user_question)
91
  context_text = " ".join([doc.page_content for doc in docs])
92
- return generate_summary_with_groq(user_question, context_text)
93
 
94
  # Main function to run the Streamlit app
95
  def main():
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.vectorstores import FAISS
9
+ from transformers import pipeline, AutoModel, AutoTokenizer
10
  import torch
11
 
12
+ # Load the summarization pipeline model
13
+ @st.cache_resource
14
+ def load_summarization_pipeline():
15
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Use a summarization model
16
+ return summarizer
17
 
18
+ summarizer = load_summarization_pipeline()
 
 
19
 
20
  # List of Hugging Face PDF URLs
21
  PDF_URLS = [
22
  "https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/administrator92ada0936848e501425591b4ad0cd417.pdf",
23
  "https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/Pakistan%20Penal%20Code.pdf",
 
24
  ]
25
 
26
  # Helper function to convert Hugging Face blob URLs to direct download URLs
 
33
  def fetch_pdf_text_from_huggingface(urls):
34
  text = ""
35
  for url in urls:
36
+ raw_url = get_huggingface_raw_url(url)
37
  response = requests.get(raw_url)
38
  if response.status_code == 200:
39
  pdf_file = BytesIO(response.content)
 
65
  vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
66
  return vector_store
67
 
68
+ # Generate summary based on the retrieved text
69
+ def generate_summary_with_huggingface(query, retrieved_text):
70
+ summarization_input = f"{query}\n\nRelated information:\n{retrieved_text}"
71
+ summary = summarizer(summarization_input, max_length=200, min_length=50, do_sample=False)
72
+ return summary[0]["summary_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # Generate response for user query
75
  def user_input(user_question, vector_store):
76
  docs = vector_store.similarity_search(user_question)
77
  context_text = " ".join([doc.page_content for doc in docs])
78
+ return generate_summary_with_huggingface(user_question, context_text)
79
 
80
  # Main function to run the Streamlit app
81
  def main():