import streamlit as st from hazm import Normalizer, SentenceTokenizer import os import docx from langchain.chat_models import ChatOpenAI from langchain.schema import SystemMessage, HumanMessage from rapidfuzz import fuzz import concurrent.futures import time import numpy as np from hazm import * import re import nltk nltk.download('punkt') st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) if "authenticated" not in st.session_state: st.session_state.authenticated = False if not st.session_state.authenticated: st.markdown('', unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) username = st.text_input("نام کاربری:", placeholder="شناسه خود را وارد کنید", label_visibility="visible") password = st.text_input("رمز عبور:", placeholder="رمز عبور ", type="password", label_visibility="visible") st.markdown(""" """, unsafe_allow_html=True) if st.button("ورود"): if username == "admin" and password == "123": st.session_state.authenticated = True st.rerun() else: st.markdown("""

نام کاربری یا رمز عبور اشتباه است.

""", unsafe_allow_html=True) st.stop() with st.sidebar: st.image("log.png", use_container_width=True) menu_items = [ ("گزارش عملیاتی", "https://cdn-icons-png.flaticon.com/512/3596/3596165.png", "https://m17idd-reporting.hf.space"), ("تاریخچه ماموریت‌ها", "https://cdn-icons-png.flaticon.com/512/709/709496.png", None), ("تحلیل داده‌های نظامی", "https://cdn-icons-png.flaticon.com/512/1828/1828932.png", "https://m17idd-test.hf.space"), ("مدیریت منابع", "https://cdn-icons-png.flaticon.com/512/681/681494.png", None), ("دستیار فرماندهی", "https://cdn-icons-png.flaticon.com/512/3601/3601646.png", None), ("تنظیمات امنیتی", "https://cdn-icons-png.flaticon.com/512/2099/2099058.png", None), ("پشتیبانی فنی", "https://cdn-icons-png.flaticon.com/512/597/597177.png", None), ] st.markdown(""" """, unsafe_allow_html=True) for idx, (text, icon, link) in enumerate(menu_items): content = f""" """ if link: content = f'{content}' st.markdown(content, unsafe_allow_html=True) if idx in [1, 3, 5]: st.markdown("

", unsafe_allow_html=True) st.markdown("""

رزم‌‌یار‌ارتش

دستیارهوشمندارتش جمهوری اسلامی ایران

""", unsafe_allow_html=True) llm = ChatOpenAI( base_url="https://api.together.xyz/v1", api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979', model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", max_tokens=1024 # specify the max tokens as per your requirement ) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) st.markdown(""" """, unsafe_allow_html=True) query = st.chat_input("چطور می‌تونم کمک کنم؟") if query: st.markdown(f'

{query}

', unsafe_allow_html=True) think = st.markdown("""

در حال فکر کردن...

""", unsafe_allow_html=True) else: st.markdown("") st.markdown(""" """, unsafe_allow_html=True) import os import re import docx import streamlit as st import concurrent.futures from hazm import Normalizer from rapidfuzz import fuzz from langchain.schema import SystemMessage, HumanMessage folder_path = '46' normalizer = Normalizer() @st.cache_data(show_spinner="در حال پردازش اسناد... لطفاً صبور باشید.") def load_and_process_documents(path): def process_docx(filename): try: full_path = os.path.join(path, filename) doc = docx.Document(full_path) text = "\n".join([para.text for para in doc.paragraphs]) normalized = normalizer.normalize(text) return filename, normalized except Exception as e: print(f"Error processing {filename}: {e}") return filename, "" filenames = [f for f in os.listdir(path) if f.endswith(".docx")] doc_texts = {} with concurrent.futures.ThreadPoolExecutor() as executor: for filename, content in executor.map(process_docx, filenames): doc_texts[filename] = content return doc_texts doc_texts = load_and_process_documents(folder_path) with open('stopwords.txt', 'r', encoding='utf-8') as file: stop_words = set(file.read().splitlines()) def remove_stop_words(text, stop_words): words = text.split() return " ".join([word for word in words if word not in stop_words]) def extract_keywords_from_text(text, query_words): matched_lines = [] lines = text.split("\n") for line in lines: if any(query_word in line for query_word in query_words): matched_lines.append(line) return matched_lines def clean_text(text): return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text) from collections import Counter import heapq def summarize_text_by_frequency(text, num_sentences=1): sentences = text.split('\n') word_freq = Counter() for sentence in sentences: for word in sentence.split(): if word not in stop_words: word_freq[word] += 1 sentence_scores = {} for sentence in sentences: for word in sentence.split(): if word in word_freq: sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word] summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get) return "\n".join(summarized_sentences) def find_closest_lines(query, doc_texts, stop_words, top_n=5): cleaned_query = remove_stop_words(query, stop_words) query_words = cleaned_query.split() all_matched_lines = [] for filename, text in doc_texts.items(): matched_lines = extract_keywords_from_text(text, query_words) for line in matched_lines: similarity = fuzz.partial_ratio(query, line) all_matched_lines.append((line, similarity)) all_matched_lines.sort(key=lambda x: x[1], reverse=True) closest_lines = [line for line, _ in all_matched_lines[:top_n]] return closest_lines def remove_stop_words_from_lines(lines, stop_words): cleaned_lines = [] for line in lines: words = line.split() cleaned_words = [word for word in words if word not in stop_words] cleaned_lines.append(" ".join(cleaned_words)) return cleaned_lines if query: closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=5) cleaned_closest_lines = [ clean_text(" ".join([word for word in line.split() if word not in stop_words])) for line in closest_lines ] summarized_text = summarize_text_by_frequency("\n".join(cleaned_closest_lines), num_sentences=1) if summarized_text: prompt = f""" تعدادی جمله مرتبط با سوال زیر استخراج شده است. لطفاً ابتدا این جملات را بررسی کن، سپس با در نظر گرفتن محتوای سوال و لحن آن، یک پاسخ نهایی حرفه‌ای، دقیق و روان ارائه کن که هم به سوال پاسخ دهد و هم از نظر نگارشی و ساختاری در سطح بالایی باشد. پاسخ نهایی باید حداکثر 1024 کاراکتر و حداقل 512 کاراکتر باشد، خلاصه و واضح نوشته شود و فقط به زبان فارسی باشد. از تکرار اضافی پرهیز کن و در صورت نیاز، محتوای جملات را ترکیب کن. سوال: {query} جملات مرتبط: {summarized_text} پاسخ نهایی حرفه‌ای بازنویسی‌شده: """ response = llm([ SystemMessage(content="You are a helpful assistant."), HumanMessage(content=prompt) ]) rewritten = response.content.strip() review_prompt = f""" لطفاً بررسی کن که آیا پاسخ زیر به سوال داده‌شده مرتبط، صحیح، معتبر و قابل قبول است یا نه. اگر پاسخ مرتبط و صحیح است، فقط بنویس: 'تأیید شد'. اگر پاسخ اشتباه است یا ربطی به سوال ندارد یا معتبر نیست، فقط بنویس: 'نیاز به اصلاح دارد'. سوال: {query} پاسخ: {rewritten} """ review_response = llm([ SystemMessage(content="You are a helpful assistant."), HumanMessage(content=review_prompt) ]) review_result = review_response.content.strip() if "تأیید شد" in review_result: rewritten = clean_text(rewritten) st.markdown(f'

{rewritten}

', unsafe_allow_html=True) think.empty() else: prompt = f""" هیچ جمله مرتبط مستقیمی با سوال یافت نشد. لطفاً با توجه به سوال زیر، یک پاسخ حرفه‌ای، مرتبط، معنادار و معتبر تولید کن. فقط از جملات موجود در اسناد معتبر استفاده کن و از ساخت جملات ساختگی یا استفاده از زبان غیر فارسی خودداری کن. سوال: {query} """ response = llm([ SystemMessage(content="You are a helpful assistant."), HumanMessage(content=prompt) ]) rewritten = clean_text(response.content.strip()) st.markdown(f'

{rewritten}

', unsafe_allow_html=True) think.empty()