File size: 4,431 Bytes
3e053bf
b396118
3e053bf
d65eab7
ef64852
154b380
d65eab7
ef64852
 
ca75932
 
cd3c5ca
b396118
42ff39f
 
 
 
 
 
ca75932
42ff39f
 
b396118
 
bd27fc9
c25d010
b396118
ca75932
bd27fc9
b396118
 
 
 
 
 
 
 
bd27fc9
 
 
b396118
 
 
 
bd27fc9
b396118
bd27fc9
b396118
ca75932
b396118
 
ef64852
 
 
 
 
 
 
 
 
 
b396118
ca75932
b396118
 
ef64852
 
 
b396118
 
 
ef64852
 
b396118
 
 
 
ca75932
 
ef64852
b396118
b67b77e
3e053bf
b396118
ef64852
 
b396118
 
bd27fc9
b396118
bd27fc9
ca75932
 
b67b77e
ca75932
bd27fc9
 
 
cd3c5ca
b396118
 
 
ca75932
b396118
 
 
 
 
 
 
 
 
 
ef64852
 
 
ca75932
 
 
dddd074
 
b396118
 
dddd074
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
os.environ["HOME"] = "/tmp"  # ✅ Fix streamlit write permission on Hugging Face

import streamlit as st
import requests
import urllib.parse

st.set_page_config(page_title="WikiTrail", layout="wide")

st.title("📚 WikiTrail")
st.markdown("Explore Wikipedia topics visually and get a summarized journey.")

# 🌐 Language options
languages = {
    "English": "en",
    "Hindi (हिन्दी)": "hi",
    "Telugu (తెలుగు)": "te",
    "Tamil (தமிழ்)": "ta"
}
lang_name = st.selectbox("🌐 Select Language", list(languages.keys()))
lang_code = languages[lang_name]

# 🔍 Topic input
topic_input = st.text_input("🔍 Enter a topic (in English)", placeholder="e.g., India, Telangana, Gandhi")
topic_input = topic_input.strip()

# 🔧 Get title (fallbacks if search fails)
def get_translated_title(query, lang):
    try:
        search_url = f"https://{lang}.wikipedia.org/w/api.php"
        params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "origin": "*"
        }
        res = requests.get(search_url, params=params)
        res.raise_for_status()
        data = res.json()
        results = data.get("query", {}).get("search", [])
        if results:
            return results[0]["title"]
        return query
    except:
        return query

# 🔍 Summary fetch
def fetch_summary(title, lang):
    title_encoded = urllib.parse.quote(title.replace(" ", "_"))
    url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title_encoded}"
    res = requests.get(url)
    if res.status_code == 200:
        data = res.json()
        return {
            "title": data.get("title", ""),
            "summary": data.get("extract", ""),
            "link": data.get("content_urls", {}).get("desktop", {}).get("page")
        }
    return None

# 🔗 Related links
def fetch_related(title, lang):
    title_encoded = urllib.parse.quote(title.replace(" ", "_"))
    url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={title_encoded}&prop=links&pllimit=5"
    res = requests.get(url)
    if res.status_code == 200:
        data = res.json()
        pages = list(data["query"]["pages"].values())
        if pages and "links" in pages[0]:
            return [link["title"] for link in pages[0]["links"]]
    return []

# 🧠 Bullet summary
def summarize_bullets(summaries, limit=3):
    full = ' '.join(set(summaries))
    sentences = full.replace("।", ".").replace("!", ".").replace("?", ".").split(".")
    clean = [s.strip() for s in sentences if s.strip()]
    return ["• " + s + "." for s in clean[:limit]] if clean else ["No summary available."]

# 🔍 Main logic
if topic_input:
    with st.spinner("🔍 Searching Wikipedia..."):
        all_text = ""
        summaries = []

        title = get_translated_title(topic_input, lang_code)
        st.caption(f"📄 Fetched title: {title}")

        main = fetch_summary(title, lang_code)
        if not main:
            st.error(f"No matching page found in {lang_name} for '{topic_input}'")
            st.stop()

        st.subheader("🔷 Main Topic")
        st.markdown(f"### {main['title']}")
        st.write(main["summary"])
        st.markdown(f"[Read More →]({main['link']})", unsafe_allow_html=True)

        summaries.append(main["summary"])
        all_text += f"{main['title']} ({lang_name})\n\n{main['summary']}\n\n"

        st.subheader("🔗 Related Topics")
        related = fetch_related(title, lang_code)
        if related:
            for r in related:
                sub = fetch_summary(r, lang_code)
                if sub and sub["summary"] not in summaries:
                    summaries.append(sub["summary"])
                    all_text += f"{sub['title']}\n{sub['summary']}\n\n"
                    with st.expander(sub["title"]):
                        st.write(sub["summary"])
                        st.markdown(f"[Read More →]({sub['link']})", unsafe_allow_html=True)
        else:
            st.info("No related topics found.")

        st.subheader("🧠 Combined Summary")
        for bullet in summarize_bullets(summaries):
            st.markdown(bullet)

        st.download_button(
            label="📥 Download Summary",
            data=all_text,
            file_name=f"{main['title']}_summary.txt",
            mime="text/plain"
        )