File size: 9,798 Bytes
580a9ea
f0915ac
0980116
 
 
0e6072b
 
2029abd
b43ceaa
580a9ea
 
 
4002882
b0842cc
4002882
 
0980116
 
 
 
4002882
0980116
 
 
 
 
 
 
 
 
 
 
 
 
 
4002882
0980116
 
4002882
0980116
 
 
 
 
 
 
 
 
 
 
 
 
c010699
0980116
6ac4ea2
 
0980116
 
 
 
 
6ac4ea2
4002882
6ac4ea2
 
 
 
4002882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602322f
4002882
602322f
4002882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac4ea2
4002882
 
 
 
602322f
4002882
602322f
4002882
 
 
 
 
 
 
6ac4ea2
c010699
4002882
 
 
c010699
 
 
4002882
 
 
 
 
c010699
 
 
0980116
f0915ac
4002882
f0915ac
0980116
 
 
 
 
 
 
 
4002882
 
0980116
4002882
0980116
 
4002882
0980116
4002882
0980116
 
4002882
 
0980116
 
4002882
0980116
4002882
 
 
0980116
 
 
4002882
 
 
 
 
 
5bd0fe8
 
 
602322f
5bd0fe8
 
 
 
 
 
 
 
 
0980116
 
4002882
0980116
 
4002882
0980116
 
 
 
 
4002882
0980116
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import os
import re
import time
import random
import gradio as gr
from huggingface_hub import InferenceClient

# Load environment variables from .env 


# Retrieve the Hugging Face API token from the environment
# Initialize the InferenceClient (update the model as needed)
client = InferenceClient(
    model="microsoft/Phi-4-mini-reasoning"  # Change to your model if needed
)

# Optional: Enable scraping if your site is deployed.
ENABLE_SCRAPING = False
SITE_URL = "https://your-agri-future-site.com"

# Global variable to hold scraped content.
knowledge_base = ""

# --- Optional: Scraping Functionality ---
if ENABLE_SCRAPING:
    try:
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.common.by import By

        def scrape_site(url):
            options = Options()
            options.headless = True  # Run browser in headless mode.
            driver = webdriver.Chrome(options=options)
            driver.get(url)
            # Use explicit waits in production; here we use a basic sleep.
            time.sleep(5)
            try:
                # Customize the selector based on your site's HTML structure.
                content_element = driver.find_element(By.ID, "content")
                page_text = content_element.text
            except Exception as e:
                page_text = "Error encountered during scraping: " + str(e)
            driver.quit()
            return page_text

        knowledge_base = scrape_site(SITE_URL)
        print("Scraped knowledge base successfully.")
    except Exception as e:
        print("Scraping failed or Selenium is not configured:", e)
else:
    print("Scraping is disabled; proceeding without scraped site content.")

# --- Multilingual Helpers ---

def is_greeting(query: str, lang: str) -> bool:
    greetings = {
        "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
        "fr": ["bonjour", "salut", "coucou", "bonsoir"],
        "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
    }
    greet_list = greetings.get(lang, greetings["en"])
    # For languages using Latin script, convert the query to lower case.
    if lang != "am":
        query = query.lower()
    return any(query.startswith(greet) for greet in greet_list)

def generate_dynamic_greeting(language: str) -> str:
    """
    Generate a dynamic, context-relevant greeting using the Hugging Face Inference API.
    """
    system_prompts = {
        "en": (
            "You are a friendly chatbot specializing in agriculture and agro-investment. "
            "A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment."
        ),
        "fr": (
            "Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. "
            "Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires."
        ),
        "am": (
            "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ ባለሙያ ቻትቦት ናቸው። "
            "ተጠቃሚው በአማርኛ ሰላም መልእክት አስቀድመዋል። "
            "በአማርኛ ተዛማጅ እና ትክክለኛ የሆነ ሰላም መልእክት ፍጥረት ያድርጉ።"
        )
    }
    prompt = system_prompts.get(language, system_prompts["en"])
    messages = [{"role": "system", "content": prompt}]
    response = client.chat_completion(
        messages,
        max_tokens=128,
        stream=False,
        temperature=1,
        top_p=0.95,
    )
    try:
        greeting_message = response.choices[0].message.content
    except AttributeError:
        greeting_message = str(response)
    return greeting_message.strip()

def generate_dynamic_out_of_scope_message(language: str) -> str:
    """
    Generate a dynamic out-of-scope message using the Hugging Face Inference API.
    """
    system_prompts = {
        "en": (
            "You are a helpful chatbot specializing in agriculture and agro-investment. "
            "A user just asked a question that is not related to these topics. "
            "Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
        ),
        "fr": (
            "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
            "Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
            "Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
        ),
        "am": (
            "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
            "ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
            "በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
        )
    }
    prompt = system_prompts.get(language, system_prompts["en"])
    messages = [{"role": "system", "content": prompt}]
    response = client.chat_completion(
        messages,
        max_tokens=128,
        stream=False,
        temperature=1,
        top_p=0.95,
    )
    try:
        out_message = response.choices[0].message.content
    except AttributeError:
        out_message = str(response)
    return out_message.strip()

def is_domain_query(query: str) -> bool:
    """
    Check if a query relates to agriculture or agro-investment.
    """
    domain_keywords = [
        "agriculture", "farming", "crop", "agro", "investment", "soil",
        "irrigation", "harvest", "organic", "sustainable", "agribusiness",
        "livestock", "agroalimentaire", "agriculture durable",
        "greenhouse", "horticulture", "pesticide", "fertilizer",
        "rural development", "food production", "crop yield", "farm equipment",
        "agronomy", "farming techniques", "organic farming", "agro-tech",
        "farm management", "agrifood"
    ]
    return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)

def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
    """
    Retrieve a relevant snippet from the text based on the query.
    """
    sentences = re.split(r'[.?!]', text)
    for sentence in sentences:
        if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
            snippet = sentence.strip()
            return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
    return ""

# --- Chat Assistant Response Function ---
def respond(message, history: list, system_message, max_tokens, temperature, top_p, language):
    # Check for a greeting.
    if is_greeting(message, language):
        yield generate_dynamic_greeting(language)
        return

    # If query is out of domain, generate an out-of-scope message.
    if not is_domain_query(message):
        yield generate_dynamic_out_of_scope_message(language)
        return

    # Build conversation context from the system message and conversation history.
    messages_list = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages_list.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages_list.append({"role": "assistant", "content": assistant_msg})
    
    # Optionally add a relevant snippet from the scraped content (if available).
    if knowledge_base:
        snippet = retrieve_relevant_snippet(message, knowledge_base)
        if snippet:
            retrieval_context = f"Reference from Agri Future Investment platform: {snippet}"
            messages_list.insert(0, {"role": "system", "content": retrieval_context})
    
    messages_list.append({"role": "user", "content": message})
    
    # Generate the assistant's answer by streaming responses.
    response_text = ""
    for partial_response in client.chat_completion(
        messages_list,
        max_tokens=1024,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        if partial_response.choices and partial_response.choices[0].delta:
            token = partial_response.choices[0].delta.content
            if token:
                response_text += token
                yield response_text
# --- Gradio Chat Interface ---
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(
            value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.", 
            label="System Message"
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
        gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language")
    ],
)

if __name__ == "__main__":
    demo.launch()