Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import shutil | |
import gradio as gr | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain.llms import CTransformers | |
# إعداد النموذج المحلي | |
def load_llm(): | |
return CTransformers( | |
model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
model_type="mistral", | |
config={"max_new_tokens": 1024, "temperature": 0.1} | |
) | |
# تحميل المستندات من المسار | |
def load_documents(file_path): | |
if file_path.endswith(".pdf"): | |
loader = PyPDFLoader(file_path) | |
elif file_path.endswith(".txt"): | |
loader = TextLoader(file_path, encoding='utf-8') | |
elif file_path.endswith(".docx"): | |
loader = Docx2txtLoader(file_path) | |
else: | |
raise ValueError("نوع الملف غير مدعوم.") | |
return loader.load() | |
# معالجة الملف وإنشاء سلسلة السؤال والإجابة | |
def process_file(file_path): | |
documents = load_documents(file_path) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) | |
texts = text_splitter.split_documents(documents) | |
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
db = Chroma.from_documents(texts, embeddings) | |
retriever = db.as_retriever(search_kwargs={"k": 3}) | |
llm = load_llm() | |
qa = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=False | |
) | |
return qa | |
# الجلوبال تشين | |
qa_chain = None | |
# دالة معالجة السؤال | |
def ask_question(file, question): | |
global qa_chain | |
if file is None or question.strip() == "": | |
return "<div dir='rtl' style='color:red;'>الرجاء تحميل ملف وكتابة سؤال.</div>" | |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[-1]) as tmp: | |
shutil.copyfileobj(file, tmp) | |
tmp_path = tmp.name | |
try: | |
qa_chain = process_file(tmp_path) | |
answer = qa_chain.run(question) | |
return f"<div dir='rtl' style='text-align: right;'>{answer}</div>" | |
except Exception as e: | |
return f"<div dir='rtl' style='color:red;'>حدث خطأ أثناء المعالجة: {str(e)}</div>" | |
# واجهة Gradio | |
with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("<h2 style='text-align: right;'>🧠📚 مساعد الوثائق الذكي</h2>") | |
gr.Markdown("<div dir='rtl'>قم برفع ملف PDF أو DOCX أو TXT، ثم اطرح أي سؤال حول محتواه.</div>") | |
with gr.Row(): | |
file_input = gr.File(label="📎 ارفع ملفك", file_types=[".pdf", ".docx", ".txt"]) | |
question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو ملخص هذا الملف؟") | |
answer_output = gr.HTML(label="💬 الإجابة") | |
ask_button = gr.Button("🔍 استعلم") | |
ask_button.click(fn=ask_question, inputs=[file_input, question_input], outputs=answer_output) | |
demo.launch() | |