Spaces:

tahirsher
/

GenAI_Lawyers_Guide

Sleeping

tahirsher commited on 19 days ago

Commit

ed56b0d

•

1 Parent(s): 930887e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import streamlit as st
 from io import BytesIO
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import FAISS
 from transformers import AutoModel, AutoTokenizer
 import torch
@@ -55,21 +56,13 @@ def get_text_chunks(text):
     chunks = text_splitter.split_text(text)
     return chunks
-# Compute embeddings for text chunks
-def compute_embeddings(text_chunks):
-    embeddings = []
-    for text in text_chunks:
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-        with torch.no_grad():
-            model_output = embedding_model(**inputs)
-        embeddings.append(model_output.last_hidden_state.mean(dim=1).squeeze().numpy())
-    return embeddings
 # Create a FAISS vector store with embeddings
 @st.cache_resource
 def load_or_create_vector_store(text_chunks):
-    embeddings = compute_embeddings(text_chunks)
-    vector_store = FAISS.from_texts(text_chunks, embeddings)
     return vector_store
 # Call Groq API for generating summary based on the query and retrieved text

 from io import BytesIO
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
 from transformers import AutoModel, AutoTokenizer
 import torch
     chunks = text_splitter.split_text(text)
     return chunks
+# Initialize embedding function
+embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # Create a FAISS vector store with embeddings
 @st.cache_resource
 def load_or_create_vector_store(text_chunks):
+    vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
     return vector_store
 # Call Groq API for generating summary based on the query and retrieved text