File size: 7,029 Bytes
02a2d80
c24bc78
02a2d80
80d2c6b
 
 
 
 
 
 
 
 
cdbe11c
611728f
0ba7f20
04209f5
 
 
c28d5e0
1650168
80d2c6b
c28d5e0
02a2d80
abfcbe7
c28d5e0
abfcbe7
 
 
c28d5e0
 
1650168
abfcbe7
 
c28d5e0
 
abfcbe7
 
02a2d80
abfcbe7
 
 
 
c28d5e0
 
abfcbe7
 
21bf972
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
 
 
 
c28d5e0
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
 
 
 
 
c28d5e0
 
abfcbe7
408d87c
 
abfcbe7
c28d5e0
 
abfcbe7
 
 
c28d5e0
 
 
 
 
02a2d80
 
 
c28d5e0
 
1650168
abfcbe7
408d87c
abfcbe7
1650168
abfcbe7
408d87c
 
3962050
 
1650168
 
 
 
02a2d80
 
1650168
 
02a2d80
 
 
 
c24bc78
 
 
 
 
02a2d80
1650168
 
 
40b0e5e
 
 
1650168
d2055dc
 
40b0e5e
 
d2055dc
 
40b0e5e
 
d2055dc
 
40b0e5e
d2055dc
 
 
52ceade
d2055dc
 
c24bc78
 
 
52ceade
c24bc78
 
 
 
d2055dc
 
 
 
1650168
40b0e5e
 
 
1650168
 
611728f
 
 
 
40b0e5e
02a2d80
80d2c6b
38908cd
 
 
f9ee1b6
02a2d80
80d2c6b
02a2d80
 
 
 
 
 
 
 
 
 
 
 
 
80d2c6b
 
 
02a2d80
80d2c6b
02a2d80
 
 
 
 
 
 
 
80d2c6b
 
02a2d80
1650168
80d2c6b
 
 
02a2d80
80d2c6b
02a2d80
 
80d2c6b
 
02a2d80
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import time
import tiktoken
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from typing import List
from together import Together


# from langchain.embeddings import TogetherEmbeddings
from langchain.schema import Document as LangchainDocument



st.set_page_config(page_title="چت‌ بات ارتش", page_icon="🪖", layout="wide")

st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
    html, body, [class*="css"] {
        font-family: 'Vazirmatn', Tahoma, sans-serif;
        direction: rtl;
        text-align: right;
    }
    .stApp {
        background: url("./military_bg.jpeg") no-repeat center center fixed;
        background-size: cover;
        backdrop-filter: blur(2px);
    }
    .stChatMessage {
        background-color: rgba(255,255,255,0.8);
        border: 1px solid #4e8a3e;
        border-radius: 12px;
        padding: 16px;
        margin-bottom: 15px;
        box-shadow: 0 4px 10px rgba(0,0,0,0.2);
        animation: fadeIn 0.4s ease-in-out;
    }
    .stTextInput > div > input, .stTextArea textarea {
        background-color: rgba(255,255,255,0.9) !important;
        border-radius: 8px !important;
        direction: rtl;
        text-align: right;
        font-family: 'Vazirmatn', Tahoma;
    }
    .stButton>button {
        background-color: #4e8a3e !important;
        color: white !important;
        font-weight: bold;
        border-radius: 10px;
        padding: 8px 20px;
        transition: 0.3s;
    }
    .stButton>button:hover {
        background-color: #3c6d30 !important;
    }
    .header-text {
        text-align: center;
        margin-top: 20px;
        margin-bottom: 40px;
        background-color: rgba(255, 255, 255, 0.75);
        padding: 20px;
        border-radius: 20px;
        box-shadow: 0 4px 12px rgba(0,0,0,0.2);
    }
    .header-text h1 {
        font-size: 42px;
        color: #2c3e50;
        margin: 0;
        font-weight: bold;
    }
    .subtitle {
        font-size: 18px;
        color: #34495e;
        margin-top: 8px;
    }
    @keyframes fadeIn {
        from { opacity: 0; transform: translateY(10px); }
        to { opacity: 1; transform: translateY(0); }
    }
    </style>
""", unsafe_allow_html=True)

col1, col2, col3 = st.columns([1, 1, 1])
with col2:
    st.image("army.png", width=240)

st.markdown("""
    <div class="header-text">
        <h1>چت‌ بات ارتش</h1>
        <div class="subtitle">دستیار هوشمند برای تصمیم‌گیری در میدان نبرد</div>
    </div>
""", unsafe_allow_html=True)


class TogetherEmbeddings(Embeddings):
    def __init__(self, model_name: str, api_key: str):
        self.model_name = model_name
        self.client = Together(api_key=api_key)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        response = self.client.embeddings.create(model=self.model_name, input=texts)
        return [item.embedding for item in response.data]

    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

        
def count_tokens(text, model_name="gpt-3.5-turbo"):
    enc = tiktoken.encoding_for_model(model_name)
    return len(enc.encode(text))
    
@st.cache_resource
def get_pdf_index():
    with st.spinner('📄 در حال پردازش فایل PDF...'):
        loader = [PyPDFLoader('test1.pdf')]
        pages = []
        for l in loader:
            pages.extend(l.load())

        splitter_initial = RecursiveCharacterTextSplitter(
            chunk_size=124, 
            chunk_overlap=25
        )

        small_chunks = []
        for page in pages:
            text = page.page_content
            if len(text) > 124:
                small_chunks.extend(splitter_initial.split_text(text))
            else:
                small_chunks.append(text)

        final_chunks = []
        max_tokens = 512

        for chunk in small_chunks:
            token_count = count_tokens(chunk, model_name="gpt-3.5-turbo")
            if token_count > max_tokens:
                splitter_token_safe = RecursiveCharacterTextSplitter(
                    chunk_size=512,
                    chunk_overlap=100
                )
                smaller_chunks = splitter_token_safe.split_text(chunk)
                final_chunks.extend(smaller_chunks)
            else:
                final_chunks.append(chunk)

        documents = [LangchainDocument(page_content=text) for text in final_chunks]

        embeddings = TogetherEmbeddings(
            model_name="togethercomputer/m2-bert-80M-32k-retrieval",
            api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
        )

        # اینجا دیگه Vectorstore مستقیم میسازیم با FAISS
        vectordb = FAISS.from_documents(documents, embedding=embeddings)

        return vectordb

index = get_pdf_index()

llm = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
    model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=index.vectorstore.as_retriever(),
    input_key='question'
)

if 'messages' not in st.session_state:
    st.session_state.messages = []

if 'pending_prompt' not in st.session_state:
    st.session_state.pending_prompt = None

for msg in st.session_state.messages:
    with st.chat_message(msg['role']):
        st.markdown(f"🗨️ {msg['content']}", unsafe_allow_html=True)

prompt = st.chat_input("چطور می‌تونم کمک کنم؟")

if prompt:
    st.session_state.messages.append({'role': 'user', 'content': prompt})
    st.session_state.pending_prompt = prompt
    st.rerun()

if st.session_state.pending_prompt:
    with st.chat_message('ai'):
        thinking = st.empty()
        thinking.markdown("🤖 در حال فکر کردن...")

        response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
        answer = response.split("Helpful Answer:")[-1].strip()
        if not answer:
            answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."

        thinking.empty()
        full_response = ""
        placeholder = st.empty()
        for word in answer.split():
            full_response += word + " "
            placeholder.markdown(full_response + "▌")
            time.sleep(0.03)

        placeholder.markdown(full_response)
        st.session_state.messages.append({'role': 'ai', 'content': full_response})
        st.session_state.pending_prompt = None