import os import re import time import random import gradio as gr from huggingface_hub import InferenceClient # Load environment variables from .env # Retrieve the Hugging Face API token from the environment # Initialize the InferenceClient (update the model as needed) client = InferenceClient( model="microsoft/Phi-4-mini-reasoning" # Change to your model if needed ) # Optional: Enable scraping if your site is deployed. ENABLE_SCRAPING = False SITE_URL = "https://your-agri-future-site.com" # Global variable to hold scraped content. knowledge_base = "" # --- Optional: Scraping Functionality --- if ENABLE_SCRAPING: try: from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By def scrape_site(url): options = Options() options.headless = True # Run browser in headless mode. driver = webdriver.Chrome(options=options) driver.get(url) # Use explicit waits in production; here we use a basic sleep. time.sleep(5) try: # Customize the selector based on your site's HTML structure. content_element = driver.find_element(By.ID, "content") page_text = content_element.text except Exception as e: page_text = "Error encountered during scraping: " + str(e) driver.quit() return page_text knowledge_base = scrape_site(SITE_URL) print("Scraped knowledge base successfully.") except Exception as e: print("Scraping failed or Selenium is not configured:", e) else: print("Scraping is disabled; proceeding without scraped site content.") # --- Multilingual Helpers --- def is_greeting(query: str, lang: str) -> bool: greetings = { "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"], "fr": ["bonjour", "salut", "coucou", "bonsoir"], "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"] } greet_list = greetings.get(lang, greetings["en"]) # For languages using Latin script, convert the query to lower case. if lang != "am": query = query.lower() return any(query.startswith(greet) for greet in greet_list) def generate_dynamic_greeting(language: str) -> str: """ Generate a dynamic, context-relevant greeting using the Hugging Face Inference API. """ system_prompts = { "en": ( "You are a friendly chatbot specializing in agriculture and agro-investment. " "A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment." ), "fr": ( "Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. " "Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires." ), "am": ( "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ ባለሙያ ቻትቦት ናቸው። " "ተጠቃሚው በአማርኛ ሰላም መልእክት አስቀድመዋል። " "በአማርኛ ተዛማጅ እና ትክክለኛ የሆነ ሰላም መልእክት ፍጥረት ያድርጉ።" ) } prompt = system_prompts.get(language, system_prompts["en"]) messages = [{"role": "system", "content": prompt}] response = client.chat_completion( messages, max_tokens=128, stream=False, temperature=1, top_p=0.95, ) try: greeting_message = response.choices[0].message.content except AttributeError: greeting_message = str(response) return greeting_message.strip() def generate_dynamic_out_of_scope_message(language: str) -> str: """ Generate a dynamic out-of-scope message using the Hugging Face Inference API. """ system_prompts = { "en": ( "You are a helpful chatbot specializing in agriculture and agro-investment. " "A user just asked a question that is not related to these topics. " "Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment." ), "fr": ( "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. " "Un utilisateur vient de poser une question qui ne concerne pas ces sujets. " "Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires." ), "am": ( "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። " "ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። " "በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።" ) } prompt = system_prompts.get(language, system_prompts["en"]) messages = [{"role": "system", "content": prompt}] response = client.chat_completion( messages, max_tokens=128, stream=False, temperature=1, top_p=0.95, ) try: out_message = response.choices[0].message.content except AttributeError: out_message = str(response) return out_message.strip() def is_domain_query(query: str) -> bool: """ Check if a query relates to agriculture or agro-investment. """ domain_keywords = [ "agriculture", "farming", "crop", "agro", "investment", "soil", "irrigation", "harvest", "organic", "sustainable", "agribusiness", "livestock", "agroalimentaire", "agriculture durable", "greenhouse", "horticulture", "pesticide", "fertilizer", "rural development", "food production", "crop yield", "farm equipment", "agronomy", "farming techniques", "organic farming", "agro-tech", "farm management", "agrifood" ] return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords) def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str: """ Retrieve a relevant snippet from the text based on the query. """ sentences = re.split(r'[.?!]', text) for sentence in sentences: if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()): snippet = sentence.strip() return snippet[:max_length] + "..." if len(snippet) > max_length else snippet return "" # --- Chat Assistant Response Function --- def respond(message, history: list, system_message, max_tokens, temperature, top_p, language): # Check for a greeting. if is_greeting(message, language): yield generate_dynamic_greeting(language) return # If query is out of domain, generate an out-of-scope message. if not is_domain_query(message): yield generate_dynamic_out_of_scope_message(language) return # Build conversation context from the system message and conversation history. messages_list = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages_list.append({"role": "user", "content": user_msg}) if assistant_msg: messages_list.append({"role": "assistant", "content": assistant_msg}) # Optionally add a relevant snippet from the scraped content (if available). if knowledge_base: snippet = retrieve_relevant_snippet(message, knowledge_base) if snippet: retrieval_context = f"Reference from Agri Future Investment platform: {snippet}" messages_list.insert(0, {"role": "system", "content": retrieval_context}) messages_list.append({"role": "user", "content": message}) # Generate the assistant's answer by streaming responses. response_text = "" for partial_response in client.chat_completion( messages_list, max_tokens=1024, stream=True, temperature=temperature, top_p=top_p, ): if partial_response.choices and partial_response.choices[0].delta: token = partial_response.choices[0].delta.content if token: response_text += token yield response_text # --- Gradio Chat Interface --- demo = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Textbox( value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.", label="System Message" ), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"), gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language") ], ) if __name__ == "__main__": demo.launch()