|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForQuestionAnswering |
|
from langchain.chains import RetrievalQA |
|
from langchain.embeddings import SentenceTransformerEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
|
|
|
model_name = "HooshvareLab/bert-fa-base-uncased" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForQuestionAnswering.from_pretrained(model_name) |
|
|
|
|
|
document_text = """ |
|
این یک متن نمونه است که به عنوان پایهای برای پاسخ به سؤالات استفاده میشود. |
|
""" |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
texts = text_splitter.split_text(document_text) |
|
|
|
|
|
embeddings = SentenceTransformerEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2") |
|
vectorstore = FAISS.from_texts(texts, embeddings) |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=model, |
|
chain_type="stuff", |
|
retriever=vectorstore.as_retriever() |
|
) |
|
|
|
|
|
def chatbot_response(query): |
|
answer = qa_chain.run(query) |
|
return answer |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chatbot_response, |
|
inputs="text", |
|
outputs="text", |
|
title="چتبات فارسی", |
|
description="یک چتبات تعاملی که از محتوای فایلهای Word استفاده میکند." |
|
) |
|
|
|
iface.launch() |