eudr_chabo_generator

Running on CPU Upgrade

App Files Files Community

leavoigt commited on Jul 22

Commit

287959e

1 Parent(s): 79ad53d

add files

Browse files

Files changed (8) hide show

Dockerfile +23 -0
README.md +56 -9
app.py +38 -4
gitignore +1 -0
params.cfg +35 -0
requirements.txt +19 -0
utils/generator.py +224 -0
utils/utils.py +46 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# -------- base image --------
+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    OMP_NUM_THREADS=1 \
+    TOKENIZERS_PARALLELISM=false
+    #GRADIO_MCP_SERVER=True
+# -------- install deps --------
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# -------- copy source --------
+COPY app.py
+COPY params.cfg .
+COPY .env* ./
+# Ports:
+#  • 7860 → Gradio UI (HF Spaces standard)
+EXPOSE 7860
+CMD ["python", "-m", "app.py"]

README.md CHANGED Viewed

@@ -1,13 +1,60 @@
 ---
-title: Chatfed Generator
-emoji: 👁
-colorFrom: pink
-colorTo: red
-sdk: gradio
-sdk_version: 5.38.0
-app_file: app.py
 pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ChatFed Generator
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
 pinned: false
+license: mit
 ---
+# ChatFed Generator - MCP Server
+A language model-based generation service designed for ChatFed RAG (Retrieval-Augmented Generation) pipelines. This module serves as an **MCP (Model Context Protocol) server** that generates contextual responses using configurable LLM providers with support for retrieval result processing.
+## MCP Endpoint
+The main MCP function is `generate` which provides context-aware text generation using configurable LLM providers when properly configured with API credentials.
+**Parameters**:
+- `query` (str, required): The question or query to be answered
+- `context` (str|list, required): Context for answering - can be plain text or list of retrieval result dictionaries
+**Returns**: String containing the generated answer based on the provided context and query.
+**Example usage**:
+```python
+from gradio_client import Client
+client = Client("ENTER CONTAINER URL / SPACE ID")
+result = client.predict(
+		query="What are the key findings?",
+		context="Your relevant documents or context here...",
+		api_name="/generate"
+)
+print(result)
+```
+## Configuration
+### LLM Provider Configuration
+1. Set your preferred inference provider in `params.cfg`
+2. Configure the model and generation parameters
+3. Set the required API key environment variable
+4. [Optional] Adjust temperature and max_tokens settings
+5. Run the app:
+```bash
+docker build -t chatfed-generator .
+docker run -p 7860:7860 chatfed-generator
+```
+## Environment Variables Required
+# Make sure to set the appropriate environment variables:
+# - OpenAI: `OPENAI_API_KEY`
+# - Anthropic: `ANTHROPIC_API_KEY`
+# - Cohere: `COHERE_API_KEY`
+# - HuggingFace: `HF_TOKEN`

app.py CHANGED Viewed

@@ -1,7 +1,41 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from .generator import generate
+# ---------------------------------------------------------------------
+# Gradio Interface with MCP support
+# ---------------------------------------------------------------------
+ui = gr.Interface(
+    fn=generate,
+    inputs=[
+        gr.Textbox(
+            label="Query",
+            lines=2,
+            placeholder="Enter query here",
+            info="The query to search for in the vector database"
+        ),
+        gr.Textbox(
+            label="Context",
+            lines=8,
+            placeholder="Paste relevant context here",
+            info="Provide the context/documents to use for answering. The API expects a list of dictionaries, but the UI should except anything"
+        ),
+    ],
+    outputs=gr.Textbox(
+        label="Generated Answer",
+        lines=6,
+        show_copy_button=True
+    ),
+    title="ChatFed Generation Module",
+    description="Ask questions based on provided context. Intended for use in RAG pipelines as an MCP server with other ChatFed modules (i.e. context supplied by semantic retriever service).",
+    api_name="generate"
+)
+# Launch with MCP server enabled
+if __name__ == "__main__":
+    ui.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        mcp_server=True,
+        show_error=True
+    )

gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .DS_Store

params.cfg ADDED Viewed

	@@ -0,0 +1,35 @@

+[generator]
+PROVIDER = huggingface
+MODEL = meta-llama/Meta-Llama-3-8B-Instruct
+MAX_TOKENS = 512
+TEMPERATURE = 0.2
+# OpenAI
+# [generator]
+# PROVIDER = openai
+# MODEL = gpt-4o
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Anthropic
+# [generator]
+# PROVIDER = anthropic
+# MODEL = claude-3-haiku-20240307
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Cohere
+# [generator]
+# PROVIDER = cohere
+# MODEL = command
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Environment Variables Required
+# Make sure to set the appropriate environment variables:
+# - OpenAI: `OPENAI_API_KEY`
+# - Anthropic: `ANTHROPIC_API_KEY`
+# - Cohere: `COHERE_API_KEY`
+# - HuggingFace: `HF_TOKEN`

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+# Core dependencies
+gradio>=4.0.0
+gradio[mcp]
+python-dotenv>=1.0.0
+# LangChain core
+langchain-core>=0.1.0
+langchain-community>=0.0.1
+# Provider-specific LangChain packages
+langchain-openai>=0.1.0
+langchain-anthropic>=0.1.0
+langchain-cohere>=0.1.0
+langchain-together>=0.1.0
+langchain-huggingface>=0.0.1
+# Additional dependencies that might be needed
+requests>=2.31.0
+pydantic>=2.0.0

utils/generator.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import logging
+import asyncio
+import json
+import ast
+from typing import List, Dict, Any, Union
+from dotenv import load_dotenv
+# LangChain imports
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_cohere import ChatCohere
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_core.messages import SystemMessage, HumanMessage
+# Local imports
+from .utils import getconfig, get_auth
+# ---------------------------------------------------------------------
+# Model / client initialization (non exaustive list of providers)
+# ---------------------------------------------------------------------
+config = getconfig("params.cfg")
+PROVIDER = config.get("generator", "PROVIDER")
+MODEL = config.get("generator", "MODEL")
+MAX_TOKENS = int(config.get("generator", "MAX_TOKENS"))
+TEMPERATURE = float(config.get("generator", "TEMPERATURE"))
+# Set up authentication for the selected provider
+auth_config = get_auth(PROVIDER)
+def get_chat_model():
+    """Initialize the appropriate LangChain chat model based on provider"""
+    common_params = {
+        "temperature": TEMPERATURE,
+        "max_tokens": MAX_TOKENS,
+    }
+    if PROVIDER == "openai":
+        return ChatOpenAI(
+            model=MODEL,
+            openai_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "anthropic":
+        return ChatAnthropic(
+            model=MODEL,
+            anthropic_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "cohere":
+        return ChatCohere(
+            model=MODEL,
+            cohere_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "huggingface":
+        # Initialize HuggingFaceEndpoint with explicit parameters
+        llm = HuggingFaceEndpoint(
+            repo_id=MODEL,
+            huggingfacehub_api_token=auth_config["api_key"],
+            task="text-generation",
+            temperature=TEMPERATURE,
+            max_new_tokens=MAX_TOKENS
+        )
+        return ChatHuggingFace(llm=llm)
+    else:
+        raise ValueError(f"Unsupported provider: {PROVIDER}")
+# Initialize provider-agnostic chat model
+chat_model = get_chat_model()
+# ---------------------------------------------------------------------
+# Context processing - may need further refinement (i.e. to manage other data sources)
+# ---------------------------------------------------------------------
+def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Extract only relevant fields from retrieval results.
+    Args:
+        retrieval_results: List of JSON objects from retriever
+    Returns:
+        List of processed objects with only relevant fields
+    """
+    retrieval_results = ast.literal_eval(retrieval_results)
+    processed_results = []
+    for result in retrieval_results:
+        # Extract the answer content
+        answer = result.get('answer', '')
+        # Extract document identification from metadata
+        metadata = result.get('answer_metadata', {})
+        doc_info = {
+            'answer': answer,
+            'filename': metadata.get('filename', 'Unknown'),
+            'page': metadata.get('page', 'Unknown'),
+            'year': metadata.get('year', 'Unknown'),
+            'source': metadata.get('source', 'Unknown'),
+            'document_id': metadata.get('_id', 'Unknown')
+        }
+        processed_results.append(doc_info)
+    return processed_results
+def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
+    """
+    Format processed retrieval results into a context string for the LLM.
+    Args:
+        processed_results: List of processed objects with relevant fields
+    Returns:
+        Formatted context string
+    """
+    if not processed_results:
+        return ""
+    context_parts = []
+    for i, result in enumerate(processed_results, 1):
+        doc_reference = f"[Document {i}: {result['filename']}"
+        if result['page'] != 'Unknown':
+            doc_reference += f", Page {result['page']}"
+        if result['year'] != 'Unknown':
+            doc_reference += f", Year {result['year']}"
+        doc_reference += "]"
+        context_part = f"{doc_reference}\n{result['answer']}\n"
+        context_parts.append(context_part)
+    return "\n".join(context_parts)
+# ---------------------------------------------------------------------
+# Core generation function for both Gradio UI and MCP
+# ---------------------------------------------------------------------
+async def _call_llm(messages: list) -> str:
+    """
+    Provider-agnostic LLM call using LangChain.
+    Args:
+        messages: List of LangChain message objects
+    Returns:
+        Generated response content as string
+    """
+    try:
+        # Use async invoke for better performance
+        response = await chat_model.ainvoke(messages)
+        return response.content.strip()
+    except Exception as e:
+        logging.exception(f"LLM generation failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
+        raise
+def build_messages(question: str, context: str) -> list:
+    """
+    Build messages in LangChain format.
+    Args:
+        question: The user's question
+        context: The relevant context for answering
+    Returns:
+        List of LangChain message objects
+    """
+    system_content = (
+        "You are an expert assistant. Answer the USER question using only the "
+        "CONTEXT provided. If the context is insufficient say 'I don't know.'"
+    )
+    user_content = f"### CONTEXT\n{context}\n\n### USER QUESTION\n{question}"
+    return [
+        SystemMessage(content=system_content),
+        HumanMessage(content=user_content)
+    ]
+async def generate(query: str, context: Union[str, List[Dict[str, Any]]]) -> str:
+    """
+    Generate an answer to a query using provided context through RAG.
+    This function takes a user query and relevant context, then uses a language model
+    to generate a comprehensive answer based on the provided information.
+    Args:
+        query (str): User query
+        context (list): List of retrieval result objects (dictionaries)
+    Returns:
+        str: The generated answer based on the query and context
+    """
+    if not query.strip():
+        return "Error: Query cannot be empty"
+    # Handle both string context (for Gradio UI) and list context (from retriever)
+    if isinstance(context, list):
+        if not context:
+            return "Error: No retrieval results provided"
+        # Process the retrieval results
+        processed_results = extract_relevant_fields(context)
+        formatted_context = format_context_from_results(processed_results)
+        if not formatted_context.strip():
+            return "Error: No valid content found in retrieval results"
+    elif isinstance(context, str):
+        if not context.strip():
+            return "Error: Context cannot be empty"
+        formatted_context = context
+    else:
+        return "Error: Context must be either a string or list of retrieval results"
+    try:
+        messages = build_messages(query, formatted_context)
+        answer = await _call_llm(messages)
+        return answer
+    except Exception as e:
+        logging.exception("Generation failed")
+        return f"Error: {str(e)}"

utils/utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import configparser
+import logging
+from dotenv import load_dotenv
+# Local .env file
+load_dotenv()
+def getconfig(configfile_path: str):
+    """
+    Read the config file
+    Params
+    ----------------
+    configfile_path: file path of .cfg file
+    """
+    config = configparser.ConfigParser()
+    try:
+        config.read_file(open(configfile_path))
+        return config
+    except:
+        logging.warning("config file not found")
+# ---------------------------------------------------------------------
+# Provider-agnostic authentication and configuration
+# ---------------------------------------------------------------------
+def get_auth(provider: str) -> dict:
+    """Get authentication configuration for different providers"""
+    auth_configs = {
+        "openai": {"api_key": os.getenv("OPENAI_API_KEY")},
+        "huggingface": {"api_key": os.getenv("HF_TOKEN")},
+        "anthropic": {"api_key": os.getenv("ANTHROPIC_API_KEY")},
+        "cohere": {"api_key": os.getenv("COHERE_API_KEY")},
+    }
+    if provider not in auth_configs:
+        raise ValueError(f"Unsupported provider: {provider}")
+    auth_config = auth_configs[provider]
+    api_key = auth_config.get("api_key")
+    if not api_key:
+        raise RuntimeError(f"Missing API key for provider '{provider}'. Please set the appropriate environment variable.")
+    return auth_config