tahirsher commited on
Commit
ed56b0d
1 Parent(s): 930887e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -12
app.py CHANGED
@@ -4,7 +4,8 @@ import streamlit as st
4
  from io import BytesIO
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain_community.vectorstores import FAISS
 
8
  from transformers import AutoModel, AutoTokenizer
9
  import torch
10
 
@@ -55,21 +56,13 @@ def get_text_chunks(text):
55
  chunks = text_splitter.split_text(text)
56
  return chunks
57
 
58
- # Compute embeddings for text chunks
59
- def compute_embeddings(text_chunks):
60
- embeddings = []
61
- for text in text_chunks:
62
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
63
- with torch.no_grad():
64
- model_output = embedding_model(**inputs)
65
- embeddings.append(model_output.last_hidden_state.mean(dim=1).squeeze().numpy())
66
- return embeddings
67
 
68
  # Create a FAISS vector store with embeddings
69
  @st.cache_resource
70
  def load_or_create_vector_store(text_chunks):
71
- embeddings = compute_embeddings(text_chunks)
72
- vector_store = FAISS.from_texts(text_chunks, embeddings)
73
  return vector_store
74
 
75
  # Call Groq API for generating summary based on the query and retrieved text
 
4
  from io import BytesIO
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import FAISS
9
  from transformers import AutoModel, AutoTokenizer
10
  import torch
11
 
 
56
  chunks = text_splitter.split_text(text)
57
  return chunks
58
 
59
+ # Initialize embedding function
60
+ embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
 
61
 
62
  # Create a FAISS vector store with embeddings
63
  @st.cache_resource
64
  def load_or_create_vector_store(text_chunks):
65
+ vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
 
66
  return vector_store
67
 
68
  # Call Groq API for generating summary based on the query and retrieved text