Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
-
from transformers import pipeline
|
| 4 |
|
| 5 |
# --- LANGCHAIN IMPORTS ---
|
| 6 |
from langchain_community.document_loaders import PyPDFLoader
|
|
@@ -51,14 +51,14 @@ else:
|
|
| 51 |
# 3) CREATE RETRIEVER
|
| 52 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
|
| 53 |
|
| 54 |
-
# 4)
|
| 55 |
-
# Use
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
)
|
| 62 |
|
| 63 |
# Define a function to generate responses
|
| 64 |
def generate_response(question, retriever):
|
|
@@ -70,7 +70,7 @@ def generate_response(question, retriever):
|
|
| 70 |
context = " ".join([doc.page_content for doc in retrieved_docs])
|
| 71 |
|
| 72 |
# Generate the response using the HF pipeline
|
| 73 |
-
input_text = f"{question}
|
| 74 |
response = hf_pipeline(input_text, max_length=200, num_return_sequences=1)
|
| 75 |
return response[0]["generated_text"]
|
| 76 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 4 |
|
| 5 |
# --- LANGCHAIN IMPORTS ---
|
| 6 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
| 51 |
# 3) CREATE RETRIEVER
|
| 52 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
|
| 53 |
|
| 54 |
+
# 4) LOAD HF TRANSFORMERS MODEL AND PIPELINE
|
| 55 |
+
# Use the AutoModelForSeq2SeqLM for T5-based models
|
| 56 |
+
model_name = "UBC-NLP/AraT5-base" # Replace with any model suitable for Arabic tasks
|
| 57 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 58 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 59 |
+
|
| 60 |
+
# Define a text2text-generation pipeline for T5
|
| 61 |
+
hf_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1)
|
| 62 |
|
| 63 |
# Define a function to generate responses
|
| 64 |
def generate_response(question, retriever):
|
|
|
|
| 70 |
context = " ".join([doc.page_content for doc in retrieved_docs])
|
| 71 |
|
| 72 |
# Generate the response using the HF pipeline
|
| 73 |
+
input_text = f"سؤال: {question}\nالنص: {context}\nالإجابة:"
|
| 74 |
response = hf_pipeline(input_text, max_length=200, num_return_sequences=1)
|
| 75 |
return response[0]["generated_text"]
|
| 76 |
|