Spaces:
Sleeping
Sleeping
File size: 9,798 Bytes
580a9ea f0915ac 0980116 0e6072b 2029abd b43ceaa 580a9ea 4002882 b0842cc 4002882 0980116 4002882 0980116 4002882 0980116 4002882 0980116 c010699 0980116 6ac4ea2 0980116 6ac4ea2 4002882 6ac4ea2 4002882 602322f 4002882 602322f 4002882 6ac4ea2 4002882 602322f 4002882 602322f 4002882 6ac4ea2 c010699 4002882 c010699 4002882 c010699 0980116 f0915ac 4002882 f0915ac 0980116 4002882 0980116 4002882 0980116 4002882 0980116 4002882 0980116 4002882 0980116 4002882 0980116 4002882 0980116 4002882 5bd0fe8 602322f 5bd0fe8 0980116 4002882 0980116 4002882 0980116 4002882 0980116 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
import os
import re
import time
import random
import gradio as gr
from huggingface_hub import InferenceClient
# Load environment variables from .env
# Retrieve the Hugging Face API token from the environment
# Initialize the InferenceClient (update the model as needed)
client = InferenceClient(
model="microsoft/Phi-4-mini-reasoning" # Change to your model if needed
)
# Optional: Enable scraping if your site is deployed.
ENABLE_SCRAPING = False
SITE_URL = "https://your-agri-future-site.com"
# Global variable to hold scraped content.
knowledge_base = ""
# --- Optional: Scraping Functionality ---
if ENABLE_SCRAPING:
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
def scrape_site(url):
options = Options()
options.headless = True # Run browser in headless mode.
driver = webdriver.Chrome(options=options)
driver.get(url)
# Use explicit waits in production; here we use a basic sleep.
time.sleep(5)
try:
# Customize the selector based on your site's HTML structure.
content_element = driver.find_element(By.ID, "content")
page_text = content_element.text
except Exception as e:
page_text = "Error encountered during scraping: " + str(e)
driver.quit()
return page_text
knowledge_base = scrape_site(SITE_URL)
print("Scraped knowledge base successfully.")
except Exception as e:
print("Scraping failed or Selenium is not configured:", e)
else:
print("Scraping is disabled; proceeding without scraped site content.")
# --- Multilingual Helpers ---
def is_greeting(query: str, lang: str) -> bool:
greetings = {
"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
"fr": ["bonjour", "salut", "coucou", "bonsoir"],
"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
}
greet_list = greetings.get(lang, greetings["en"])
# For languages using Latin script, convert the query to lower case.
if lang != "am":
query = query.lower()
return any(query.startswith(greet) for greet in greet_list)
def generate_dynamic_greeting(language: str) -> str:
"""
Generate a dynamic, context-relevant greeting using the Hugging Face Inference API.
"""
system_prompts = {
"en": (
"You are a friendly chatbot specializing in agriculture and agro-investment. "
"A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment."
),
"fr": (
"Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. "
"Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires."
),
"am": (
"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ ባለሙያ ቻትቦት ናቸው። "
"ተጠቃሚው በአማርኛ ሰላም መልእክት አስቀድመዋል። "
"በአማርኛ ተዛማጅ እና ትክክለኛ የሆነ ሰላም መልእክት ፍጥረት ያድርጉ።"
)
}
prompt = system_prompts.get(language, system_prompts["en"])
messages = [{"role": "system", "content": prompt}]
response = client.chat_completion(
messages,
max_tokens=128,
stream=False,
temperature=1,
top_p=0.95,
)
try:
greeting_message = response.choices[0].message.content
except AttributeError:
greeting_message = str(response)
return greeting_message.strip()
def generate_dynamic_out_of_scope_message(language: str) -> str:
"""
Generate a dynamic out-of-scope message using the Hugging Face Inference API.
"""
system_prompts = {
"en": (
"You are a helpful chatbot specializing in agriculture and agro-investment. "
"A user just asked a question that is not related to these topics. "
"Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
),
"fr": (
"Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
"Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
"Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
),
"am": (
"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
"ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
"በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
)
}
prompt = system_prompts.get(language, system_prompts["en"])
messages = [{"role": "system", "content": prompt}]
response = client.chat_completion(
messages,
max_tokens=128,
stream=False,
temperature=1,
top_p=0.95,
)
try:
out_message = response.choices[0].message.content
except AttributeError:
out_message = str(response)
return out_message.strip()
def is_domain_query(query: str) -> bool:
"""
Check if a query relates to agriculture or agro-investment.
"""
domain_keywords = [
"agriculture", "farming", "crop", "agro", "investment", "soil",
"irrigation", "harvest", "organic", "sustainable", "agribusiness",
"livestock", "agroalimentaire", "agriculture durable",
"greenhouse", "horticulture", "pesticide", "fertilizer",
"rural development", "food production", "crop yield", "farm equipment",
"agronomy", "farming techniques", "organic farming", "agro-tech",
"farm management", "agrifood"
]
return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)
def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
"""
Retrieve a relevant snippet from the text based on the query.
"""
sentences = re.split(r'[.?!]', text)
for sentence in sentences:
if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
snippet = sentence.strip()
return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
return ""
# --- Chat Assistant Response Function ---
def respond(message, history: list, system_message, max_tokens, temperature, top_p, language):
# Check for a greeting.
if is_greeting(message, language):
yield generate_dynamic_greeting(language)
return
# If query is out of domain, generate an out-of-scope message.
if not is_domain_query(message):
yield generate_dynamic_out_of_scope_message(language)
return
# Build conversation context from the system message and conversation history.
messages_list = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages_list.append({"role": "user", "content": user_msg})
if assistant_msg:
messages_list.append({"role": "assistant", "content": assistant_msg})
# Optionally add a relevant snippet from the scraped content (if available).
if knowledge_base:
snippet = retrieve_relevant_snippet(message, knowledge_base)
if snippet:
retrieval_context = f"Reference from Agri Future Investment platform: {snippet}"
messages_list.insert(0, {"role": "system", "content": retrieval_context})
messages_list.append({"role": "user", "content": message})
# Generate the assistant's answer by streaming responses.
response_text = ""
for partial_response in client.chat_completion(
messages_list,
max_tokens=1024,
stream=True,
temperature=temperature,
top_p=top_p,
):
if partial_response.choices and partial_response.choices[0].delta:
token = partial_response.choices[0].delta.content
if token:
response_text += token
yield response_text
# --- Gradio Chat Interface ---
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(
value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.",
label="System Message"
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language")
],
)
if __name__ == "__main__":
demo.launch() |