Spaces:
Configuration error
Configuration error
File size: 4,992 Bytes
869543a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# farmlingua/app/agents/crew_pipeline.py333
import os
import sys
import requests
import joblib
import faiss
import numpy as np
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from app.utils import config
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if BASE_DIR not in sys.path:
sys.path.insert(0, BASE_DIR)
DEVICE = 0 if os.environ.get("CUDA_VISIBLE_DEVICES") else -1
try:
classifier = joblib.load(config.CLASSIFIER_PATH)
except Exception:
classifier = None
print(f"Loading expert model ({config.EXPERT_MODEL_NAME})...")
expert_pipeline = pipeline(
"text-generation",
model=config.EXPERT_MODEL_NAME,
device=DEVICE,
max_new_tokens=700,
temperature=0.3,
repetition_penalty=1.1
)
print(f"Loading formatter/weather model ({config.FORMATTER_MODEL_NAME})...")
formatter_pipeline = pipeline(
"text2text-generation",
model=config.FORMATTER_MODEL_NAME,
device=DEVICE
)
embedder = SentenceTransformer(config.EMBEDDING_MODEL)
def retrieve_docs(query, vs_path):
if not vs_path or not os.path.exists(vs_path):
return None
if os.path.isdir(vs_path):
try:
from langchain.vectorstores import FAISS as LCFAISS
from langchain.embeddings import SentenceTransformerEmbeddings
embed_model = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
vs = LCFAISS.load_local(str(vs_path), embed_model, allow_dangerous_deserialization=True)
docs = vs.similarity_search(query, k=3)
return "\n\n".join(d.page_content for d in docs) if docs else None
except Exception:
return None
try:
index = faiss.read_index(str(vs_path))
except Exception:
return None
query_vec = np.array([embedder.encode(query)], dtype=np.float32)
D, I = index.search(query_vec, k=3)
if D[0][0] == 0:
return None
meta_path = str(vs_path) + "_meta.npy"
if os.path.exists(meta_path):
metadata = np.load(meta_path, allow_pickle=True).item()
docs = [metadata.get(str(idx), "") for idx in I[0] if str(idx) in metadata]
docs = [doc for doc in docs if doc]
return "\n\n".join(docs) if docs else None
return None
def get_weather(state_name):
url = "http://api.weatherapi.com/v1/current.json"
params = {
"key": config.WEATHER_API_KEY,
"q": f"{state_name}, Nigeria",
"aqi": "no"
}
r = requests.get(url, params=params)
if r.status_code != 200:
return f"Unable to retrieve weather for {state_name}."
data = r.json()
return (
f"Weather in {state_name}:\n"
f"- Condition: {data['current']['condition']['text']}\n"
f"- Temperature: {data['current']['temp_c']}°C\n"
f"- Humidity: {data['current']['humidity']}%\n"
f"- Wind: {data['current']['wind_kph']} kph"
)
def detect_intent(query):
q_lower = query.lower()
if any(word in q_lower for word in ["weather", "temperature", "rain", "forecast"]):
for state in config.STATES:
if state.lower() in q_lower:
return "weather", state
return "weather", None
if any(word in q_lower for word in ["latest", "update", "breaking", "news", "current", "predict"]):
return "live_update", None
if hasattr(classifier, "predict") and hasattr(classifier, "predict_proba"):
predicted_intent = classifier.predict([query])[0]
confidence = max(classifier.predict_proba([query])[0])
if confidence < config.CLASSIFIER_CONFIDENCE_THRESHOLD:
return "low_confidence", None
return predicted_intent, None
return "normal", None
def run_pipeline(user_query: str):
intent, extra = detect_intent(user_query)
if intent == "weather" and extra:
weather_text = get_weather(extra)
return formatter_pipeline(weather_text, max_length=256, do_sample=False)[0]["generated_text"]
if intent == "live_update":
context = retrieve_docs(user_query, config.LIVE_VS_PATH)
if context:
user_query += f"\n\nLatest agricultural updates:\n{context}"
if intent == "low_confidence":
context = retrieve_docs(user_query, config.STATIC_VS_PATH)
if context:
user_query += f"\n\nReference information:\n{context}"
expert_response = expert_pipeline(
f"Provide a detailed agricultural answer for: {user_query}",
max_new_tokens=700,
temperature=0.3
)[0]['generated_text']
formatted_response = formatter_pipeline(
f"Format the following answer to be clear, structured, and easy to understand for Nigerian farmers:\n\n{expert_response}",
max_length=512,
do_sample=False
)[0]['generated_text']
return formatted_response
|