Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on 13 days ago

Commit

50b8f6f

1 Parent(s): d3b344a

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -3

app.py CHANGED Viewed

@@ -559,6 +559,32 @@ def extract_keywords_from_text(text, query_words):
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
@@ -593,16 +619,18 @@ if query:
         clean_text(" ".join([word for word in line.split() if word not in stop_words]))
         for line in closest_lines
     ]
-    if cleaned_closest_lines:
-        cleaned_text = "\n".join(cleaned_closest_lines[:1])
         prompt = f"""
         لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن. فقط از متن خطوط مرتبط استفاده کن. اگر اطلاعات کافی در متن وجود ندارد، صادقانه اعلام کن.
         سوال:
         {query}
         خطوط مرتبط:
-        {cleaned_text}
         پاسخ نهایی:
         """

 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
+from collections import Counter
+import heapq
+def summarize_text_by_frequency(text, num_sentences=3):
+    sentences = text.split('\n')
+    word_freq = Counter()
+    for sentence in sentences:
+        for word in sentence.split():
+            if word not in stop_words:
+                word_freq[word] += 1
+    sentence_scores = {}
+    for sentence in sentences:
+        for word in sentence.split():
+            if word in word_freq:
+                sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word]
+    summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
+    return "\n".join(summarized_sentences)
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
         clean_text(" ".join([word for word in line.split() if word not in stop_words]))
         for line in closest_lines
     ]
+    summarized_text = summarize_text_by_frequency("\n".join(cleaned_closest_lines), num_sentences=1)
+    if summarized_text:
+        # cleaned_text = "\n".join(cleaned_closest_lines[:1])
         prompt = f"""
         لطفاً با توجه به سؤال زیر و محتوای خطوط مرتبط، یک پاسخ نهایی حرفه‌ای، دقیق و روان تولید کن. فقط از متن خطوط مرتبط استفاده کن. اگر اطلاعات کافی در متن وجود ندارد، صادقانه اعلام کن.
         سوال:
         {query}
         خطوط مرتبط:
+        {summarized_text}
         پاسخ نهایی:
         """