Spaces:

M17idd
/

army

Sleeping

File size: 7,029 Bytes

02a2d80
c24bc78
02a2d80
80d2c6b
 
 
 
 
 
 
 
 
cdbe11c
611728f
0ba7f20
04209f5
 
 
c28d5e0
1650168
80d2c6b
c28d5e0
02a2d80
abfcbe7
c28d5e0
abfcbe7
 
 
c28d5e0
 
1650168
abfcbe7
 
c28d5e0
 
abfcbe7
 
02a2d80
abfcbe7
 
 
 
c28d5e0
 
abfcbe7
 
21bf972
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
 
 
 
c28d5e0
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
 
 
 
 
c28d5e0
 
abfcbe7
408d87c
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
c28d5e0
 
 
 
 
02a2d80
 
 
c28d5e0
 
1650168
abfcbe7
408d87c
abfcbe7
1650168
abfcbe7
408d87c
 
3962050
 
1650168
 
 
 
02a2d80
 
1650168
 
02a2d80
 
 
 
c24bc78
 
 
 
 
02a2d80
1650168
 
 
40b0e5e
 
 
1650168
d2055dc
 
40b0e5e
 
d2055dc
 
40b0e5e
 
d2055dc
 
40b0e5e
d2055dc
 
 
52ceade
d2055dc
 
c24bc78
 
 
52ceade
c24bc78
 
 
 
d2055dc
 
 
 
1650168
40b0e5e
 
 
1650168
 
611728f
 
 
 
40b0e5e
02a2d80
80d2c6b
38908cd
 
 
f9ee1b6
02a2d80
80d2c6b
02a2d80
 
 
 
 
 
 
 
 
 
 
 
 
80d2c6b
 
 
02a2d80
80d2c6b
02a2d80
 
 
 
 
 
 
 
80d2c6b
 
02a2d80
1650168
80d2c6b
 
 
02a2d80
80d2c6b
02a2d80
 
80d2c6b
 
02a2d80

import time
import tiktoken
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from typing import List
from together import Together


# from langchain.embeddings import TogetherEmbeddings
from langchain.schema import Document as LangchainDocument



st.set_page_config(page_title="چت‌ بات ارتش", page_icon="🪖", layout="wide")

st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
    html, body, [class*="css"] {
        font-family: 'Vazirmatn', Tahoma, sans-serif;
        direction: rtl;
        text-align: right;
    }
    .stApp {
        background: url("./military_bg.jpeg") no-repeat center center fixed;
        background-size: cover;
        backdrop-filter: blur(2px);
    }
    .stChatMessage {
        background-color: rgba(255,255,255,0.8);
        border: 1px solid #4e8a3e;
        border-radius: 12px;
        padding: 16px;
        margin-bottom: 15px;
        box-shadow: 0 4px 10px rgba(0,0,0,0.2);
        animation: fadeIn 0.4s ease-in-out;
    }
    .stTextInput > div > input, .stTextArea textarea {
        background-color: rgba(255,255,255,0.9) !important;
        border-radius: 8px !important;
        direction: rtl;
        text-align: right;
        font-family: 'Vazirmatn', Tahoma;
    }
    .stButton>button {
        background-color: #4e8a3e !important;
        color: white !important;
        font-weight: bold;
        border-radius: 10px;
        padding: 8px 20px;
        transition: 0.3s;
    }
    .stButton>button:hover {
        background-color: #3c6d30 !important;
    }
    .header-text {
        text-align: center;
        margin-top: 20px;
        margin-bottom: 40px;
        background-color: rgba(255, 255, 255, 0.75);
        padding: 20px;
        border-radius: 20px;
        box-shadow: 0 4px 12px rgba(0,0,0,0.2);
    }
    .header-text h1 {
        font-size: 42px;
        color: #2c3e50;
        margin: 0;
        font-weight: bold;
    }
    .subtitle {
        font-size: 18px;
        color: #34495e;
        margin-top: 8px;
    }
    @keyframes fadeIn {
        from { opacity: 0; transform: translateY(10px); }
        to { opacity: 1; transform: translateY(0); }
    }
    </style>
""", unsafe_allow_html=True)

col1, col2, col3 = st.columns([1, 1, 1])
with col2:
    st.image("army.png", width=240)

st.markdown("""
    <div class="header-text">
        <h1>چت‌ بات ارتش</h1>
        <div class="subtitle">دستیار هوشمند برای تصمیم‌گیری در میدان نبرد</div>
    </div>
""", unsafe_allow_html=True)


class TogetherEmbeddings(Embeddings):
    def __init__(self, model_name: str, api_key: str):
        self.model_name = model_name
        self.client = Together(api_key=api_key)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        response = self.client.embeddings.create(model=self.model_name, input=texts)
        return [item.embedding for item in response.data]

    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

        
def count_tokens(text, model_name="gpt-3.5-turbo"):
    enc = tiktoken.encoding_for_model(model_name)
    return len(enc.encode(text))
    
@st.cache_resource
def get_pdf_index():
    with st.spinner('📄 در حال پردازش فایل PDF...'):
        loader = [PyPDFLoader('test1.pdf')]
        pages = []
        for l in loader:
            pages.extend(l.load())

        splitter_initial = RecursiveCharacterTextSplitter(
            chunk_size=124, 
            chunk_overlap=25
        )

        small_chunks = []
        for page in pages:
            text = page.page_content
            if len(text) > 124:
                small_chunks.extend(splitter_initial.split_text(text))
            else:
                small_chunks.append(text)

        final_chunks = []
        max_tokens = 512

        for chunk in small_chunks:
            token_count = count_tokens(chunk, model_name="gpt-3.5-turbo")
            if token_count > max_tokens:
                splitter_token_safe = RecursiveCharacterTextSplitter(
                    chunk_size=512,
                    chunk_overlap=100
                )
                smaller_chunks = splitter_token_safe.split_text(chunk)
                final_chunks.extend(smaller_chunks)
            else:
                final_chunks.append(chunk)

        documents = [LangchainDocument(page_content=text) for text in final_chunks]

        embeddings = TogetherEmbeddings(
            model_name="togethercomputer/m2-bert-80M-32k-retrieval",
            api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
        )

        # اینجا دیگه Vectorstore مستقیم میسازیم با FAISS
        vectordb = FAISS.from_documents(documents, embedding=embeddings)

        return vectordb

index = get_pdf_index()

llm = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
    model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=index.vectorstore.as_retriever(),
    input_key='question'
)

if 'messages' not in st.session_state:
    st.session_state.messages = []

if 'pending_prompt' not in st.session_state:
    st.session_state.pending_prompt = None

for msg in st.session_state.messages:
    with st.chat_message(msg['role']):
        st.markdown(f"🗨️ {msg['content']}", unsafe_allow_html=True)

prompt = st.chat_input("چطور می‌تونم کمک کنم؟")

if prompt:
    st.session_state.messages.append({'role': 'user', 'content': prompt})
    st.session_state.pending_prompt = prompt
    st.rerun()

if st.session_state.pending_prompt:
    with st.chat_message('ai'):
        thinking = st.empty()
        thinking.markdown("🤖 در حال فکر کردن...")

        response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
        answer = response.split("Helpful Answer:")[-1].strip()
        if not answer:
            answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."

        thinking.empty()
        full_response = ""
        placeholder = st.empty()
        for word in answer.split():
            full_response += word + " "
            placeholder.markdown(full_response + "▌")
            time.sleep(0.03)

        placeholder.markdown(full_response)
        st.session_state.messages.append({'role': 'ai', 'content': full_response})
        st.session_state.pending_prompt = None