MOHAMMED-N commited on
Commit
ce4aabc
·
verified ·
1 Parent(s): 4fefb82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import os
3
- from transformers import pipeline
4
 
5
  # --- LANGCHAIN IMPORTS ---
6
  from langchain_community.document_loaders import PyPDFLoader
@@ -51,14 +51,14 @@ else:
51
  # 3) CREATE RETRIEVER
52
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
53
 
54
- # 4) SET UP HF TRANSFORMERS PIPELINE
55
- # Use Hugging Face's transformers pipeline for text generation
56
- hf_pipeline = pipeline(
57
- "text-generation",
58
- model="UBC-NLP/AraT5-base", # Arabic-friendly model
59
- tokenizer="UBC-NLP/AraT5-base",
60
- device=-1
61
- )
62
 
63
  # Define a function to generate responses
64
  def generate_response(question, retriever):
@@ -70,7 +70,7 @@ def generate_response(question, retriever):
70
  context = " ".join([doc.page_content for doc in retrieved_docs])
71
 
72
  # Generate the response using the HF pipeline
73
- input_text = f"{question} Context: {context}"
74
  response = hf_pipeline(input_text, max_length=200, num_return_sequences=1)
75
  return response[0]["generated_text"]
76
 
 
1
  import streamlit as st
2
  import os
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
  # --- LANGCHAIN IMPORTS ---
6
  from langchain_community.document_loaders import PyPDFLoader
 
51
  # 3) CREATE RETRIEVER
52
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
53
 
54
+ # 4) LOAD HF TRANSFORMERS MODEL AND PIPELINE
55
+ # Use the AutoModelForSeq2SeqLM for T5-based models
56
+ model_name = "UBC-NLP/AraT5-base" # Replace with any model suitable for Arabic tasks
57
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
58
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
59
+
60
+ # Define a text2text-generation pipeline for T5
61
+ hf_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1)
62
 
63
  # Define a function to generate responses
64
  def generate_response(question, retriever):
 
70
  context = " ".join([doc.page_content for doc in retrieved_docs])
71
 
72
  # Generate the response using the HF pipeline
73
+ input_text = f"سؤال: {question}\nالنص: {context}\nالإجابة:"
74
  response = hf_pipeline(input_text, max_length=200, num_return_sequences=1)
75
  return response[0]["generated_text"]
76