File size: 3,639 Bytes
1d969fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
import re
from collections import defaultdict

def create_inverted_index(dataframe):
    """
    Create an inverted index from the Quran dataset for efficient word searching.
    
    Args:
        dataframe (pd.DataFrame): The Quran dataset with Text column
        
    Returns:
        defaultdict: A dictionary mapping words to their locations (surah, ayah)
    """
    inverted_index = defaultdict(list)
    for index, row in dataframe.iterrows():
        words = row['Text'].split()
        for word in words:
            word = re.sub(r'\W+', '', word).lower()
            if word:
                inverted_index[word].append((row['Surah'], row['Ayah']))
    return inverted_index

def search_quran(keyword, quran_data, inverted_index):
    """
    Search for a keyword in the Quran and return formatted results.
    
    Args:
        keyword (str): The word to search for
        quran_data (pd.DataFrame): The Quran dataset
        inverted_index (defaultdict): The inverted index for word searching
        
    Returns:
        str: HTML formatted search results
    """
    results = []
    if not keyword or keyword.lower() not in inverted_index:
        return f"No occurrences found for '{keyword}'"
    
    verse_ids = inverted_index[keyword.lower()]
    for verse_id in verse_ids:
        surah_num, verse_num = verse_id
        verse_data = quran_data[(quran_data['Surah'] == surah_num) & 
                              (quran_data['Ayah'] == verse_num)]
        surah_name = verse_data['Surah Name'].iloc[0]
        verse_text = verse_data['Text'].iloc[0]
        highlighted_text = re.sub(
            f"(?i)({keyword})", 
            r'<mark style="background-color: yellow; color: black;">\1</mark>', 
            verse_text
        )
        results.append(
            f"<div style='padding: 10px; border-bottom: 1px solid #ccc;'>"
            f"<strong>Surah:</strong> {surah_name}<br>"
            f"<strong>Reference:</strong> {surah_num}:{verse_num}<br>"
            f"<strong>Occurrences in verse:</strong> {verse_text.lower().count(keyword.lower())}<br>"
            f"<strong>Text:</strong> {highlighted_text}</div>"
        )
    return '<div style="max-height: 500px; overflow-y: auto; width: 100%; font-size: 18px;">' + ''.join(results) + '</div>'

def create_search_tab(quran_data):
    """
    Create the search tab interface with description and functionality.
    
    Args:
        quran_data (pd.DataFrame): The Quran dataset
        
    Returns:
        gr.Tab: The configured search tab
    """
    inverted_index = create_inverted_index(quran_data)
    
    with gr.Tab("Search") as tab:
        gr.Markdown("# Quran Keyword Search")
        gr.Markdown("""
        <div class='tab-description'>
        Welcome to the Quran Search tool! This feature allows you to:
        - Search for any word or phrase in the English translation
        - See highlighted matches in their original context
        - View Surah name and verse references for each result
        - Track the number of occurrences in each verse
        </div>
        """)
        
        keyword_input = gr.Textbox(
            label="Enter a keyword",
            placeholder="Type a word to search in the Quran"
        )
        search_results = gr.HTML(label="Search Results")
        search_button = gr.Button("Search")
        
        def search_wrapper(keyword):
            return search_quran(keyword, quran_data, inverted_index)
        
        search_button.click(
            search_wrapper,
            inputs=keyword_input,
            outputs=search_results
        )
    
    return tab