Spaces:

Myoussef11
/

Voice_Analysis_Toolkit

Sleeping

App Files Files Community

Myoussef11 commited on Aug 16

Commit

b5c921b

1 Parent(s): 83da7d5

Add project files for HF Space

Browse files

Files changed (5) hide show

Dockerfile +9 -11
docker-compose.yml +0 -40
requirements.txt +1 -3
src/config.py +1 -3
src/services/analysis_service.py +71 -46

Dockerfile CHANGED Viewed

@@ -7,9 +7,8 @@ WORKDIR /app
 # Install essential system-level dependencies
 # - ffmpeg: Required by the pydub library for audio processing
 # - git: Required by some pip packages for installation from version control
-RUN apt-get update && apt-get install -y \
     ffmpeg \
-    git \
     && rm -rf /var/lib/apt/lists/*
 # Copy the dependency file first to leverage Docker's layer caching
@@ -21,16 +20,15 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy the application source code into the container
 # force a re-installation of all packages
 COPY src/ src/
-RUN curl -fsSL https://ollama.com/install.sh | sh
 # Expose the port Gradio will run on, making it accessible to the host
 EXPOSE 7860
-# Set the environment variable for the Ollama host when running inside Docker.
-ENV TRANSFORMERS_CACHE=/tmp/hf_cache
-ENV HF_HOME=/tmp/hf_home
-ENV XDG_CACHE_HOME=/tmp
-# Define the default command to run when the container starts
-# Uses Python's module flag '-m' for correct package path resolution
-CMD ollama serve & sleep 5 && python -m src.app

 # Install essential system-level dependencies
 # - ffmpeg: Required by the pydub library for audio processing
 # - git: Required by some pip packages for installation from version control
+RUN RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 # Copy the dependency file first to leverage Docker's layer caching
 # Copy the application source code into the container
 # force a re-installation of all packages
 COPY src/ src/
+# Set environment variables to control where Hugging Face downloads models
+# inside the container, which can be useful on some platforms.
+ENV HF_HOME=/app/huggingface_cache
+ENV TRANSFORMERS_CACHE=/app/huggingface_cache
 # Expose the port Gradio will run on, making it accessible to the host
 EXPOSE 7860
+# Define the default command to run the application.
+# The server_name="0.0.0.0" is crucial for it to be accessible inside Docker.
+CMD ["python", "-m", "src.app"]

docker-compose.yml DELETED Viewed

@@ -1,40 +0,0 @@
-version: '3.8'
-services:
-  # Service 1: Our Voice Analysis Application
-  app:
-    # Build the image using the Dockerfile in the current directory
-    build: .
-    # Expose the Gradio port
-    ports:
-      - "7860:7860"
-    # Set the environment variable for the Ollama host
-    environment:
-      - OLLAMA_HOST=ollama
-    # Make this service depend on the 'ollama' service
-    # This ensures that Ollama starts up before our application tries to connect to it
-    depends_on:
-      - ollama
-  # Service 2: The Ollama Server
-  ollama:
-    # Use the official Ollama Docker image
-    image: ollama/ollama
-    # Expose the Ollama API port so our 'app' service can reach it.
-    ports:
-      - "11434:11434"
-    # Set the environment variable to enable GPU support
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    # Mount a volume to persist Ollama data
-    # This allows Ollama to retain its state and models across container restarts
-    volumes:
-      - ollama_data:/root/.ollama
-volumes:
-  ollama_data:

requirements.txt CHANGED Viewed

@@ -6,7 +6,5 @@ openai-whisper==20231117
 openai
 pydub==0.25.1
 python-dotenv==1.0.1
-requests==2.32.3
 pytest==8.2.2
-pytest-mock==3.14.0
-ollama==0.2.1

 openai
 pydub==0.25.1
 python-dotenv==1.0.1
 pytest==8.2.2
+pytest-mock==3.14.0

src/config.py CHANGED Viewed

@@ -14,12 +14,10 @@ APP_DESCRIPTION = (
 # Model Configuration
 MODEL_PROVIDER = "local"  # Set to 'local' or 'openai'
-OLLAMA_HOST = "localhost" # Default to localhost if not set
-OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") # Default model for Ollama
 # Local model settings (if MODEL_PROVIDER is 'local')
 LOCAL_TRANSCRIPTION_MODEL = "openai/whisper-base.en"
-LOCAL_ANALYSIS_MODEL = "meta-llama/Llama-3-8B-Instruct"
 # OpenAI API settings (if MODEL_PROVIDER is 'openai')
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

 # Model Configuration
 MODEL_PROVIDER = "local"  # Set to 'local' or 'openai'
 # Local model settings (if MODEL_PROVIDER is 'local')
 LOCAL_TRANSCRIPTION_MODEL = "openai/whisper-base.en"
+LOCAL_ANALYSIS_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 # OpenAI API settings (if MODEL_PROVIDER is 'openai')
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

src/services/analysis_service.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import ollama
 from openai import OpenAI, OpenAIError
 from src import config
-from src.utils.exceptions import AnalysisError, IrrelevantQuestionError
 from src.logging_config import logger
@@ -10,36 +11,43 @@ class AnalysisService:
     A service class for performing text analysis tasks.
     It uses Ollama for local analysis and the OpenAI API for remote analysis.
     """
-    def _analyze_local(self, prompt: str) -> str:
         """
-        Generates a response by calling the local Ollama server using the host
-        address defined in the application's configuration.
         """
-        # Read the configured host from config.py
-        ollama_host = "localhost"
-        try:
-            # Initialize the Ollama client with the determined host
-            client = ollama.Client(host=f"http://{ollama_host}:11434")
-            logger.info(f"Sending analysis request to Ollama server at {ollama_host}.")
-            response = client.generate(
-                model=config.OLLAMA_MODEL,
-                prompt=prompt
-            )
-            logger.info("Ollama analysis successful.")
-            return response['response'].strip()
-        except ollama.ResponseError as e:
-            logger.error(f"Ollama API error: {e.error}", exc_info=True)
-            raise AnalysisError(f"An error occurred with the Ollama API: {e.error}")
-        except Exception as e:
-            # Catch other potential issues like connection problems
-            logger.error(f"Error during Ollama request to {ollama_host}: {e}", exc_info=True)
-        raise AnalysisError("An unexpected error occurred while communicating with the Ollama server.")
     def _analyze_openai(self, prompt: str) -> str:
         """
@@ -70,16 +78,35 @@ class AnalysisService:
     def _analyze(self, prompt: str) -> str:
         """
-        Private dispatcher method to route analysis to the correct provider.
         """
-        provider = config.MODEL_PROVIDER.lower()
-        if provider == 'local':
-            return self._analyze_local(prompt)
-        elif provider == 'openai':
-            return self._analyze_openai(prompt)
-        else:
-            logger.error(f"Invalid MODEL_PROVIDER configured: {config.MODEL_PROVIDER}")
-            raise ValueError(f"Invalid model provider '{config.MODEL_PROVIDER}' specified in config.")
     def summarize(self, text: str) -> str:
         """
@@ -146,13 +173,11 @@ class AnalysisService:
         {question}
         """
-        response = self._analyze(prompt)
-        # Check for our custom error signal from the LLM
-        if "ERROR: The answer to this question cannot be found" in response:
-            logger.warning(f"Model indicated question '{question}' is unanswerable from text.")
-            raise IrrelevantQuestionError(
-                "The question could not be answered based on the provided audio content."
-            )
-        return response

+import torch
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, Pipeline
 from openai import OpenAI, OpenAIError
 from src import config
+from src.utils.exceptions import AnalysisError
 from src.logging_config import logger
     A service class for performing text analysis tasks.
     It uses Ollama for local analysis and the OpenAI API for remote analysis.
     """
+    _local_pipeline: Pipeline = None
+    @classmethod
+    def _get_local_pipeline(cls) -> Pipeline:
         """
+        Initializes and returns the local text-generation pipeline.
+        This method caches the pipeline to ensure the model is loaded only once per session.
         """
+        if cls._local_pipeline is None:
+            try:
+                model_name = config.LOCAL_ANALYSIS_MODEL
+                logger.info(f"Initializing local analysis model: {model_name}")
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+                logger.info(f"Using device: {device} for analysis.")
+                # For 7B models on CPU, loading in a lower precision can save RAM
+                torch_dtype = torch.bfloat16 if device == "cuda" else torch.float32
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch_dtype,
+                    trust_remote_code=True
+                )
+                tokenizer = AutoTokenizer.from_pretrained(model_name)
+                cls._local_pipeline = pipeline(
+                    "text-generation",
+                    model=model,
+                    tokenizer=tokenizer,
+                    device=device
+                )
+                logger.info("Local analysis model initialized successfully.")
+            except Exception as e:
+                logger.critical(f"Failed to load local analysis model: {e}", exc_info=True)
+                raise AnalysisError("Could not initialize the local analysis model.")
+        return cls._local_pipeline
     def _analyze_openai(self, prompt: str) -> str:
         """
     def _analyze(self, prompt: str) -> str:
         """
+        Generates a response using the local transformers pipeline.
         """
+        try:
+            logger.info("Starting local analysis with transformers pipeline.")
+            pipe = self._get_local_pipeline()
+            # Zephyr and other modern chat models use a specific chat template format.
+            # The pipeline tokenizer can apply this for us.
+            messages = [{"role": "user", "content": prompt}]
+            # The pipeline handles the conversation templating internally.
+            outputs = pipe(
+                messages,
+                max_new_tokens=512,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.95,
+            )
+            # The response format from a text-generation pipeline is slightly different
+            response = outputs[0]["generated_text"]
+            if isinstance(response, list):
+                # For some models, the last message in the list is the response
+                response = response[-1]['content']
+            logger.info("Local analysis successful.")
+            return response.strip()
+        except Exception as e:
+            logger.error(f"Error during local analysis: {e}", exc_info=True)
+            raise AnalysisError("An unexpected error occurred during local analysis.")
     def summarize(self, text: str) -> str:
         """
         {question}
         """
+        raw_response = self._analyze(prompt)
+        # The model might include the whole prompt in its response, so we clean it.
+        # We find the last instance of the user's question and take the text after it.
+        # This is a common post-processing step for text-generation pipelines.
+        split_token = f"**NEW USER QUESTION:**\n{question}"
+        if split_token in raw_response:
+             return raw_response.split(split_token)[-1].strip()
+        return raw_response