Spaces:
Running
Running
Nagesh Muralidhar
commited on
Commit
·
a288236
1
Parent(s):
752c2c6
deploying server
Browse files- server/.env +5 -0
- server/Dockerfile +24 -0
- server/README.md +59 -0
- server/__pycache__/agents.cpython-311.pyc +0 -0
- server/__pycache__/main.cpython-311.pyc +0 -0
- server/__pycache__/shared.cpython-311.pyc +0 -0
- server/__pycache__/transcript_manager.cpython-311.pyc +0 -0
- server/__pycache__/workflow.cpython-311.pyc +0 -0
- server/agents.py +422 -0
- server/app.py +5 -0
- server/context_storage/1_context.json +1 -0
- server/main.py +454 -0
- server/qdrant_db/.lock +1 -0
- server/qdrant_db/meta.json +1 -0
- server/requirements.txt +14 -0
- server/test_workflow.py +303 -0
- server/transcripts/podcasts.json +1 -0
- server/workflow.py +254 -0
server/.env
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPENAI_API_KEY=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
|
2 |
+
|
3 |
+
ELEVEN_API_KEY=sk_ff6d28e7b04706abb03caf8c3239f75f373687ba440b2263
|
4 |
+
TAVILY_API_KEY=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
|
5 |
+
|
server/Dockerfile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
build-essential \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
# Copy requirements first for better caching
|
11 |
+
COPY requirements.txt .
|
12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
13 |
+
|
14 |
+
# Copy the rest of the application
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
# Create necessary directories
|
18 |
+
RUN mkdir -p audio_storage context_storage transcripts
|
19 |
+
|
20 |
+
# Expose the port
|
21 |
+
EXPOSE 7860
|
22 |
+
|
23 |
+
# Command to run the application
|
24 |
+
CMD ["python", "app.py"]
|
server/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Podcast Discussion Server
|
2 |
+
|
3 |
+
This is a FastAPI-based server that provides podcast discussion and analysis capabilities.
|
4 |
+
|
5 |
+
## Environment Variables
|
6 |
+
|
7 |
+
The following environment variables need to be set in the Hugging Face Space:
|
8 |
+
|
9 |
+
- `OPENAI_API_KEY`: Your OpenAI API key
|
10 |
+
- `ALLOWED_ORIGINS`: Comma-separated list of allowed origins (optional, defaults to Vercel domains and localhost)
|
11 |
+
|
12 |
+
## API Endpoints
|
13 |
+
|
14 |
+
- `/chat`: Main chat endpoint for podcast discussions
|
15 |
+
- `/podcast-chat/{podcast_id}`: Chat endpoint for specific podcast discussions
|
16 |
+
- `/audio-list`: List available audio files
|
17 |
+
- `/audio/{filename}`: Get specific audio file
|
18 |
+
- `/podcast/{podcast_id}/context`: Get podcast context
|
19 |
+
|
20 |
+
## Stack
|
21 |
+
|
22 |
+
- FastAPI
|
23 |
+
- OpenAI
|
24 |
+
- Langchain
|
25 |
+
- Qdrant
|
26 |
+
- GTTS
|
27 |
+
|
28 |
+
## Deployment
|
29 |
+
|
30 |
+
### Backend (Hugging Face Spaces)
|
31 |
+
|
32 |
+
This server is deployed on Hugging Face Spaces using their Docker deployment feature.
|
33 |
+
|
34 |
+
### Frontend (Vercel)
|
35 |
+
|
36 |
+
When deploying the frontend to Vercel:
|
37 |
+
|
38 |
+
1. Set the API base URL in your frontend environment:
|
39 |
+
|
40 |
+
```
|
41 |
+
VITE_API_BASE_URL=https://your-username-your-space-name.hf.space
|
42 |
+
```
|
43 |
+
|
44 |
+
2. The server is already configured to accept requests from:
|
45 |
+
|
46 |
+
- All Vercel domains (\*.vercel.app)
|
47 |
+
- Local development servers (localhost:3000, localhost:5173)
|
48 |
+
|
49 |
+
3. If you're using a custom domain, add it to the `ALLOWED_ORIGINS` environment variable in your Hugging Face Space:
|
50 |
+
```
|
51 |
+
ALLOWED_ORIGINS=https://your-custom-domain.com,https://www.your-custom-domain.com
|
52 |
+
```
|
53 |
+
|
54 |
+
## Security Features
|
55 |
+
|
56 |
+
- CORS protection with specific origin allowlist
|
57 |
+
- Security headers (HSTS, XSS Protection, etc.)
|
58 |
+
- Rate limiting
|
59 |
+
- SSL/TLS encryption (provided by Hugging Face Spaces)
|
server/__pycache__/agents.cpython-311.pyc
ADDED
Binary file (24.6 kB). View file
|
|
server/__pycache__/main.cpython-311.pyc
ADDED
Binary file (26 kB). View file
|
|
server/__pycache__/shared.cpython-311.pyc
ADDED
Binary file (3.68 kB). View file
|
|
server/__pycache__/transcript_manager.cpython-311.pyc
ADDED
Binary file (7.04 kB). View file
|
|
server/__pycache__/workflow.cpython-311.pyc
ADDED
Binary file (11.9 kB). View file
|
|
server/agents.py
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import ChatOpenAI
|
2 |
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
3 |
+
from langchain_community.agent_toolkits.load_tools import load_tools
|
4 |
+
from langchain_community.tools.tavily_search.tool import TavilySearchResults
|
5 |
+
from langchain_community.tools import ElevenLabsText2SpeechTool
|
6 |
+
import tiktoken
|
7 |
+
from typing import Dict, Any, List, Optional
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from datetime import datetime
|
11 |
+
import logging
|
12 |
+
import json
|
13 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
14 |
+
import numpy as np
|
15 |
+
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
16 |
+
from langchain.output_parsers import PydanticOutputParser
|
17 |
+
from pydantic import BaseModel, Field
|
18 |
+
|
19 |
+
# Configure logging
|
20 |
+
logging.basicConfig(
|
21 |
+
level=logging.INFO,
|
22 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
23 |
+
handlers=[
|
24 |
+
logging.FileHandler('agents.log'),
|
25 |
+
logging.StreamHandler()
|
26 |
+
]
|
27 |
+
)
|
28 |
+
|
29 |
+
# Create loggers for each agent
|
30 |
+
extractor_logger = logging.getLogger('ExtractorAgent')
|
31 |
+
skeptic_logger = logging.getLogger('SkepticAgent')
|
32 |
+
believer_logger = logging.getLogger('BelieverAgent')
|
33 |
+
supervisor_logger = logging.getLogger('SupervisorAgent')
|
34 |
+
podcast_logger = logging.getLogger('PodcastProducerAgent')
|
35 |
+
|
36 |
+
# Load environment variables
|
37 |
+
load_dotenv()
|
38 |
+
|
39 |
+
# Get API keys
|
40 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
41 |
+
eleven_api_key = os.getenv("ELEVEN_API_KEY")
|
42 |
+
|
43 |
+
if not openai_api_key:
|
44 |
+
raise ValueError("OPENAI_API_KEY not found in environment variables")
|
45 |
+
if not eleven_api_key:
|
46 |
+
raise ValueError("ELEVEN_API_KEY not found in environment variables")
|
47 |
+
|
48 |
+
# Initialize the base LLM
|
49 |
+
base_llm = ChatOpenAI(
|
50 |
+
model_name="gpt-3.5-turbo",
|
51 |
+
temperature=0.7,
|
52 |
+
openai_api_key=openai_api_key
|
53 |
+
)
|
54 |
+
|
55 |
+
class ExtractorOutput(BaseModel):
|
56 |
+
content: str = Field(description="The extracted and refined query")
|
57 |
+
key_points: List[str] = Field(description="Key points extracted from the query")
|
58 |
+
|
59 |
+
class AgentResponse(BaseModel):
|
60 |
+
content: str = Field(description="The agent's response")
|
61 |
+
chunks: Optional[List[Dict[str, str]]] = Field(description="Relevant context chunks used", default=None)
|
62 |
+
|
63 |
+
class SupervisorOutput(BaseModel):
|
64 |
+
content: str = Field(description="The supervisor's analysis")
|
65 |
+
chunks: Dict[str, List[str]] = Field(description="Quadrant-based chunks of the analysis")
|
66 |
+
|
67 |
+
class PodcastOutput(BaseModel):
|
68 |
+
title: str = Field(description="Title of the podcast episode")
|
69 |
+
description: str = Field(description="Description of the episode")
|
70 |
+
script: str = Field(description="The podcast script")
|
71 |
+
summary: str = Field(description="A brief summary of the episode")
|
72 |
+
duration_minutes: int = Field(description="Estimated duration in minutes")
|
73 |
+
|
74 |
+
class ExtractorAgent:
|
75 |
+
def __init__(self, tavily_api_key: str):
|
76 |
+
self.search_tool = TavilySearchResults(
|
77 |
+
api_key=tavily_api_key,
|
78 |
+
max_results=5
|
79 |
+
)
|
80 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
81 |
+
("system", """You are an expert information extractor. Your role is to:
|
82 |
+
1. Extract relevant information from search results
|
83 |
+
2. Organize the information in a clear, structured way
|
84 |
+
3. Focus on factual, verifiable information
|
85 |
+
4. Cite sources when possible"""),
|
86 |
+
("human", "{input}")
|
87 |
+
])
|
88 |
+
self.chain = self.prompt | base_llm
|
89 |
+
|
90 |
+
async def __call__(self, query: str) -> Dict[str, Any]:
|
91 |
+
try:
|
92 |
+
# Log the incoming query
|
93 |
+
extractor_logger.info(f"Processing query: {query}")
|
94 |
+
|
95 |
+
try:
|
96 |
+
# Search using Tavily
|
97 |
+
search_results = await self.search_tool.ainvoke(query)
|
98 |
+
extractor_logger.debug(f"Search results: {json.dumps(search_results, indent=2)}")
|
99 |
+
except Exception as e:
|
100 |
+
extractor_logger.error(f"Error in Tavily search: {str(e)}", exc_info=True)
|
101 |
+
raise Exception(f"Tavily search failed: {str(e)}")
|
102 |
+
|
103 |
+
# Format the results
|
104 |
+
if isinstance(search_results, list):
|
105 |
+
formatted_results = f"Search results for: {query}\n" + "\n".join(
|
106 |
+
[str(result) for result in search_results]
|
107 |
+
)
|
108 |
+
else:
|
109 |
+
formatted_results = f"Search results for: {query}\n{search_results}"
|
110 |
+
|
111 |
+
try:
|
112 |
+
# Generate response using the chain
|
113 |
+
response = await self.chain.ainvoke({"input": formatted_results})
|
114 |
+
extractor_logger.info(f"Generated response: {response.content}")
|
115 |
+
except Exception as e:
|
116 |
+
extractor_logger.error(f"Error in LLM chain: {str(e)}", exc_info=True)
|
117 |
+
raise Exception(f"LLM chain failed: {str(e)}")
|
118 |
+
|
119 |
+
return {
|
120 |
+
"type": "extractor",
|
121 |
+
"content": response.content,
|
122 |
+
"raw_results": search_results
|
123 |
+
}
|
124 |
+
except Exception as e:
|
125 |
+
extractor_logger.error(f"Error in ExtractorAgent: {str(e)}", exc_info=True)
|
126 |
+
raise Exception(f"Error in extractor: {str(e)}")
|
127 |
+
|
128 |
+
class SkepticAgent:
|
129 |
+
def __init__(self):
|
130 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
131 |
+
("system", """You are a critical thinker engaging in a thoughtful discussion. While maintaining a balanced perspective, you should:
|
132 |
+
- Analyze potential challenges and limitations
|
133 |
+
- Consider real-world implications
|
134 |
+
- Support arguments with evidence and examples
|
135 |
+
- Maintain a respectful and constructive tone
|
136 |
+
- Raise important considerations
|
137 |
+
|
138 |
+
If provided with context information, use it to inform your response while maintaining your analytical perspective.
|
139 |
+
Focus on examining risks and important questions from the context.
|
140 |
+
|
141 |
+
Keep your responses concise and focused on the topic at hand."""),
|
142 |
+
("human", """Context information:
|
143 |
+
{chunks}
|
144 |
+
|
145 |
+
Question/Topic:
|
146 |
+
{input}""")
|
147 |
+
])
|
148 |
+
self.chain = self.prompt | base_llm
|
149 |
+
|
150 |
+
async def __call__(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
151 |
+
skeptic_logger.info(f"Processing input: {input_data['content']}")
|
152 |
+
chunks = input_data.get("chunks", [])
|
153 |
+
chunks_text = "\n".join(chunks) if chunks else "No additional context provided."
|
154 |
+
|
155 |
+
response = await self.chain.ainvoke({
|
156 |
+
"input": input_data["content"],
|
157 |
+
"chunks": chunks_text
|
158 |
+
})
|
159 |
+
skeptic_logger.info(f"Generated response: {response.content}")
|
160 |
+
return {"type": "skeptic", "content": response.content}
|
161 |
+
|
162 |
+
class BelieverAgent:
|
163 |
+
def __init__(self):
|
164 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
165 |
+
("system", """You are an optimistic thinker engaging in a thoughtful discussion. While maintaining a balanced perspective, you should:
|
166 |
+
- Highlight opportunities and potential benefits
|
167 |
+
- Share innovative solutions and possibilities
|
168 |
+
- Support arguments with evidence and examples
|
169 |
+
- Maintain a constructive and forward-thinking tone
|
170 |
+
- Build on existing ideas positively
|
171 |
+
|
172 |
+
If provided with context information, use it to inform your response while maintaining your optimistic perspective.
|
173 |
+
Focus on opportunities and solutions from the context.
|
174 |
+
|
175 |
+
Keep your responses concise and focused on the topic at hand."""),
|
176 |
+
("human", """Context information:
|
177 |
+
{chunks}
|
178 |
+
|
179 |
+
Question/Topic:
|
180 |
+
{input}""")
|
181 |
+
])
|
182 |
+
self.chain = self.prompt | base_llm
|
183 |
+
|
184 |
+
async def __call__(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
185 |
+
believer_logger.info(f"Processing input: {input_data['content']}")
|
186 |
+
chunks = input_data.get("chunks", [])
|
187 |
+
chunks_text = "\n".join(chunks) if chunks else "No additional context provided."
|
188 |
+
|
189 |
+
response = await self.chain.ainvoke({
|
190 |
+
"input": input_data["content"],
|
191 |
+
"chunks": chunks_text
|
192 |
+
})
|
193 |
+
believer_logger.info(f"Generated response: {response.content}")
|
194 |
+
return {"type": "believer", "content": response.content}
|
195 |
+
|
196 |
+
class SupervisorAgent:
|
197 |
+
def __init__(self):
|
198 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
199 |
+
("system", """You are a balanced supervisor. Your role is to:
|
200 |
+
1. Analyze inputs from all agents
|
201 |
+
2. Identify key points and insights
|
202 |
+
3. Balance different perspectives
|
203 |
+
4. Synthesize a comprehensive view
|
204 |
+
5. Provide clear, actionable conclusions
|
205 |
+
|
206 |
+
Organize your response into these sections:
|
207 |
+
- Opportunities: Key possibilities and positive aspects
|
208 |
+
- Risks: Important challenges and concerns
|
209 |
+
- Questions: Critical questions to consider
|
210 |
+
- Solutions: Potential ways forward
|
211 |
+
|
212 |
+
Focus on creating a balanced, well-reasoned synthesis of all viewpoints.
|
213 |
+
Keep your words to 100 words or less."""),
|
214 |
+
("human", "Analyze the following perspectives:\n\nExtractor: {extractor_content}\n\nSkeptic: {skeptic_content}\n\nBeliever: {believer_content}")
|
215 |
+
])
|
216 |
+
self.chain = self.prompt | base_llm
|
217 |
+
|
218 |
+
async def __call__(self, agent_responses: Dict[str, Any]) -> Dict[str, Any]:
|
219 |
+
supervisor_logger.info("Processing agent responses:")
|
220 |
+
supervisor_logger.info(f"Extractor: {agent_responses['extractor']['content']}")
|
221 |
+
supervisor_logger.info(f"Skeptic: {agent_responses['skeptic']['content']}")
|
222 |
+
supervisor_logger.info(f"Believer: {agent_responses['believer']['content']}")
|
223 |
+
|
224 |
+
# Process supervisor's analysis
|
225 |
+
response = await self.chain.ainvoke({
|
226 |
+
"extractor_content": agent_responses["extractor"]["content"],
|
227 |
+
"skeptic_content": agent_responses["skeptic"]["content"],
|
228 |
+
"believer_content": agent_responses["believer"]["content"]
|
229 |
+
})
|
230 |
+
|
231 |
+
supervisor_logger.info(f"Generated analysis: {response.content}")
|
232 |
+
|
233 |
+
# Parse the response into sections
|
234 |
+
content = response.content
|
235 |
+
sections = {
|
236 |
+
"opportunities": [],
|
237 |
+
"risks": [],
|
238 |
+
"questions": [],
|
239 |
+
"solutions": []
|
240 |
+
}
|
241 |
+
|
242 |
+
# Simple parsing of the content into sections
|
243 |
+
current_section = None
|
244 |
+
for line in content.split('\n'):
|
245 |
+
line = line.strip()
|
246 |
+
if line.lower().startswith('opportunities:'):
|
247 |
+
current_section = "opportunities"
|
248 |
+
elif line.lower().startswith('risks:'):
|
249 |
+
current_section = "risks"
|
250 |
+
elif line.lower().startswith('questions:'):
|
251 |
+
current_section = "questions"
|
252 |
+
elif line.lower().startswith('solutions:'):
|
253 |
+
current_section = "solutions"
|
254 |
+
elif line and current_section:
|
255 |
+
sections[current_section].append(line)
|
256 |
+
|
257 |
+
return {
|
258 |
+
"type": "supervisor",
|
259 |
+
"content": response.content,
|
260 |
+
"chunks": sections
|
261 |
+
}
|
262 |
+
|
263 |
+
class PodcastProducerAgent:
|
264 |
+
def __init__(self):
|
265 |
+
podcast_logger.info("Initializing PodcastProducerAgent")
|
266 |
+
|
267 |
+
# Initialize the agent with a lower temperature for more consistent output
|
268 |
+
llm = ChatOpenAI(
|
269 |
+
model_name="gpt-3.5-turbo",
|
270 |
+
temperature=0.3,
|
271 |
+
openai_api_key=openai_api_key
|
272 |
+
)
|
273 |
+
|
274 |
+
# Create audio storage directory if it doesn't exist
|
275 |
+
self.audio_dir = os.path.join(os.path.dirname(__file__), "audio_storage")
|
276 |
+
os.makedirs(self.audio_dir, exist_ok=True)
|
277 |
+
podcast_logger.info(f"Audio directory: {self.audio_dir}")
|
278 |
+
|
279 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
280 |
+
("system", """You are an expert podcast producer. Create a single, cohesive podcast script that:
|
281 |
+
1. Introduces the topic clearly
|
282 |
+
2. Presents a balanced debate between perspectives
|
283 |
+
3. Incorporates key insights from the supervisor's analysis:
|
284 |
+
- Opportunities and positive aspects
|
285 |
+
- Risks and challenges
|
286 |
+
- Key questions to consider
|
287 |
+
- Potential solutions
|
288 |
+
4. Prioritizes content based on quadrant analysis:
|
289 |
+
- Important & Urgent: Address first and emphasize
|
290 |
+
- Important & Not Urgent: Cover thoroughly but with less urgency
|
291 |
+
- Not Important & Urgent: Mention briefly if relevant
|
292 |
+
- Not Important & Not Urgent: Include only if adds value
|
293 |
+
5. Maintains natural conversation flow with clear speaker transitions
|
294 |
+
6. Concludes with actionable takeaways
|
295 |
+
|
296 |
+
Keep the tone professional but conversational. Format the script with clear speaker indicators and natural pauses."""),
|
297 |
+
("human", """Create a podcast script from this content:
|
298 |
+
|
299 |
+
Topic: {user_query}
|
300 |
+
|
301 |
+
Debate Content:
|
302 |
+
{debate_content}
|
303 |
+
|
304 |
+
Supervisor's Analysis:
|
305 |
+
{supervisor_content}
|
306 |
+
|
307 |
+
Quadrant Analysis:
|
308 |
+
Important & Urgent:
|
309 |
+
{important_urgent}
|
310 |
+
|
311 |
+
Important & Not Urgent:
|
312 |
+
{important_not_urgent}
|
313 |
+
|
314 |
+
Not Important & Urgent:
|
315 |
+
{not_important_urgent}
|
316 |
+
|
317 |
+
Not Important & Not Urgent:
|
318 |
+
{not_important_not_urgent}""")
|
319 |
+
])
|
320 |
+
self.chain = self.prompt | llm
|
321 |
+
|
322 |
+
# Metadata prompt for categorization
|
323 |
+
self.metadata_prompt = ChatPromptTemplate.from_messages([
|
324 |
+
("system", """Analyze the debate and provide:
|
325 |
+
1. A category (single word: technology/science/society/politics/economics/culture)
|
326 |
+
2. A short description (3-4 words) of the main topic
|
327 |
+
Format: category|short_description"""),
|
328 |
+
("human", "{content}")
|
329 |
+
])
|
330 |
+
self.metadata_chain = self.metadata_prompt | llm
|
331 |
+
|
332 |
+
async def __call__(self, debate_history: list, supervisor_notes: list, user_query: str, supervisor_chunks: dict, quadrant_analysis: dict) -> Dict[str, Any]:
|
333 |
+
try:
|
334 |
+
podcast_logger.info("Starting podcast production")
|
335 |
+
|
336 |
+
# Format the debate content
|
337 |
+
debate_content = "\n\n".join([
|
338 |
+
f"{entry['speaker']}: {entry['content']}"
|
339 |
+
for entry in debate_history
|
340 |
+
])
|
341 |
+
|
342 |
+
# Get the latest supervisor analysis
|
343 |
+
supervisor_content = supervisor_notes[-1] if supervisor_notes else ""
|
344 |
+
|
345 |
+
# Format quadrant content
|
346 |
+
important_urgent = "\n".join(quadrant_analysis.get("important_urgent", []))
|
347 |
+
important_not_urgent = "\n".join(quadrant_analysis.get("important_not_urgent", []))
|
348 |
+
not_important_urgent = "\n".join(quadrant_analysis.get("not_important_urgent", []))
|
349 |
+
not_important_not_urgent = "\n".join(quadrant_analysis.get("not_important_not_urgent", []))
|
350 |
+
|
351 |
+
# Generate the podcast script
|
352 |
+
script_response = await self.chain.ainvoke({
|
353 |
+
"user_query": user_query,
|
354 |
+
"debate_content": debate_content,
|
355 |
+
"supervisor_content": supervisor_content,
|
356 |
+
"important_urgent": important_urgent,
|
357 |
+
"important_not_urgent": important_not_urgent,
|
358 |
+
"not_important_urgent": not_important_urgent,
|
359 |
+
"not_important_not_urgent": not_important_not_urgent
|
360 |
+
})
|
361 |
+
|
362 |
+
# Get metadata for the podcast
|
363 |
+
metadata_response = await self.metadata_chain.ainvoke({
|
364 |
+
"content": script_response.content
|
365 |
+
})
|
366 |
+
category, description = metadata_response.content.strip().split("|")
|
367 |
+
|
368 |
+
# Clean up filename components
|
369 |
+
clean_query = user_query.lower().replace(" ", "_")[:30]
|
370 |
+
clean_description = description.lower().replace(" ", "_")
|
371 |
+
clean_category = category.lower().strip()
|
372 |
+
|
373 |
+
try:
|
374 |
+
# Create a single filename with hyphens separating main components
|
375 |
+
filename = f"{clean_query}-{clean_description}-{clean_category}.mp3"
|
376 |
+
filepath = os.path.join(self.audio_dir, filename)
|
377 |
+
|
378 |
+
# Generate audio file
|
379 |
+
from gtts import gTTS
|
380 |
+
tts = gTTS(text=script_response.content, lang='en')
|
381 |
+
tts.save(filepath)
|
382 |
+
|
383 |
+
podcast_logger.info(f"Successfully saved audio file: {filepath}")
|
384 |
+
|
385 |
+
return {
|
386 |
+
"type": "podcast",
|
387 |
+
"content": script_response.content,
|
388 |
+
"audio_file": filename,
|
389 |
+
"category": clean_category,
|
390 |
+
"description": description,
|
391 |
+
"title": f"Debate: {description.title()}"
|
392 |
+
}
|
393 |
+
except Exception as e:
|
394 |
+
podcast_logger.error(f"Error in audio generation: {str(e)}", exc_info=True)
|
395 |
+
return {
|
396 |
+
"type": "podcast",
|
397 |
+
"content": script_response.content,
|
398 |
+
"error": f"Audio generation failed: {str(e)}"
|
399 |
+
}
|
400 |
+
|
401 |
+
except Exception as e:
|
402 |
+
podcast_logger.error(f"Error in podcast production: {str(e)}", exc_info=True)
|
403 |
+
return {
|
404 |
+
"type": "podcast",
|
405 |
+
"error": f"Podcast production failed: {str(e)}"
|
406 |
+
}
|
407 |
+
|
408 |
+
def create_agents(tavily_api_key: str) -> Dict[str, Any]:
|
409 |
+
# Initialize all agents
|
410 |
+
extractor = ExtractorAgent(tavily_api_key)
|
411 |
+
believer = BelieverAgent()
|
412 |
+
skeptic = SkepticAgent()
|
413 |
+
supervisor = SupervisorAgent()
|
414 |
+
podcast_producer = PodcastProducerAgent()
|
415 |
+
|
416 |
+
return {
|
417 |
+
"extractor": extractor,
|
418 |
+
"believer": believer,
|
419 |
+
"skeptic": skeptic,
|
420 |
+
"supervisor": supervisor,
|
421 |
+
"podcast_producer": podcast_producer
|
422 |
+
}
|
server/app.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uvicorn
|
2 |
+
from main import app
|
3 |
+
|
4 |
+
if __name__ == "__main__":
|
5 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
server/context_storage/1_context.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"topic": "are humans dumb", "believer_chunks": ["Humans possess remarkable intelligence that has allowed us to innovate, create art, and advance technology.", "Human intelligence enables us to solve complex problems, adapt to various environments, and collaborate effectively."], "skeptic_chunks": ["The term 'dumb' is subjective and doesn't fully capture the diverse capabilities and potential of human intelligence.", "While humans may make mistakes or exhibit ignorance in certain areas, it doesn't diminish the overall cognitive abilities and achievements of our species."]}
|
server/main.py
ADDED
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi.responses import FileResponse
|
4 |
+
from fastapi.staticfiles import StaticFiles
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from typing import List, Dict, Any, Optional
|
7 |
+
import os
|
8 |
+
import json
|
9 |
+
from workflow import create_workflow, run_workflow
|
10 |
+
import logging
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
from langchain.prompts import ChatPromptTemplate
|
14 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
15 |
+
from langchain_community.vectorstores import Qdrant
|
16 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
17 |
+
from langchain_openai.chat_models import ChatOpenAI
|
18 |
+
from operator import itemgetter
|
19 |
+
from langchain.schema.output_parser import StrOutputParser
|
20 |
+
from langchain.schema.runnable import RunnablePassthrough
|
21 |
+
|
22 |
+
|
23 |
+
# Load environment variables
|
24 |
+
load_dotenv()
|
25 |
+
|
26 |
+
# Configure logging
|
27 |
+
logging.basicConfig(level=logging.INFO)
|
28 |
+
logger = logging.getLogger(__name__)
|
29 |
+
|
30 |
+
# Initialize components
|
31 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
32 |
+
if not openai_api_key:
|
33 |
+
raise ValueError("OpenAI API key not configured")
|
34 |
+
|
35 |
+
# Initialize OpenAI components
|
36 |
+
chat_model = ChatOpenAI(
|
37 |
+
model_name="gpt-3.5-turbo",
|
38 |
+
temperature=0.7,
|
39 |
+
openai_api_key=openai_api_key
|
40 |
+
)
|
41 |
+
|
42 |
+
# Initialize FastAPI app
|
43 |
+
app = FastAPI()
|
44 |
+
|
45 |
+
# Get allowed origins from environment variable or use default list
|
46 |
+
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "").split(",")
|
47 |
+
if not ALLOWED_ORIGINS:
|
48 |
+
ALLOWED_ORIGINS = [
|
49 |
+
"http://localhost:5173",
|
50 |
+
"http://localhost:3000",
|
51 |
+
"https://*.vercel.app", # Allow all Vercel preview and production domains
|
52 |
+
]
|
53 |
+
|
54 |
+
# Configure CORS for frontend servers
|
55 |
+
app.add_middleware(
|
56 |
+
CORSMiddleware,
|
57 |
+
allow_origins=ALLOWED_ORIGINS,
|
58 |
+
allow_origin_regex=r"https://.*\.vercel\.app$", # Allow all Vercel domains
|
59 |
+
allow_credentials=True,
|
60 |
+
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD"],
|
61 |
+
allow_headers=["*"],
|
62 |
+
expose_headers=["*"],
|
63 |
+
max_age=3600,
|
64 |
+
)
|
65 |
+
|
66 |
+
# Add security headers middleware
|
67 |
+
@app.middleware("http")
|
68 |
+
async def add_security_headers(request, call_next):
|
69 |
+
response = await call_next(request)
|
70 |
+
response.headers["X-Content-Type-Options"] = "nosniff"
|
71 |
+
response.headers["X-Frame-Options"] = "DENY"
|
72 |
+
response.headers["X-XSS-Protection"] = "1; mode=block"
|
73 |
+
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
74 |
+
return response
|
75 |
+
|
76 |
+
# Configure audio storage
|
77 |
+
audio_dir = os.path.join(os.path.dirname(__file__), "audio_storage")
|
78 |
+
os.makedirs(audio_dir, exist_ok=True)
|
79 |
+
|
80 |
+
# Mount the audio directory as a static file directory
|
81 |
+
app.mount("/audio-files", StaticFiles(directory=audio_dir), name="audio")
|
82 |
+
|
83 |
+
# Configure context storage
|
84 |
+
context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
|
85 |
+
os.makedirs(context_dir, exist_ok=True)
|
86 |
+
|
87 |
+
class ChatMessage(BaseModel):
|
88 |
+
content: str
|
89 |
+
context: Optional[Dict[str, Any]] = None
|
90 |
+
agent_type: Optional[str] = "believer"
|
91 |
+
|
92 |
+
class WorkflowResponse(BaseModel):
|
93 |
+
debate_history: List[Dict[str, str]]
|
94 |
+
supervisor_notes: List[str]
|
95 |
+
supervisor_chunks: List[Dict[str, List[str]]]
|
96 |
+
extractor_data: Dict[str, Any]
|
97 |
+
final_podcast: Dict[str, Any]
|
98 |
+
|
99 |
+
class PodcastChatRequest(BaseModel):
|
100 |
+
message: str
|
101 |
+
|
102 |
+
class PodcastChatResponse(BaseModel):
|
103 |
+
response: str
|
104 |
+
|
105 |
+
@app.get("/audio-list")
|
106 |
+
async def list_audio_files():
|
107 |
+
"""List all available audio files."""
|
108 |
+
try:
|
109 |
+
files = os.listdir(audio_dir)
|
110 |
+
audio_files = []
|
111 |
+
for file in files:
|
112 |
+
if file.endswith(('.mp3', '.wav')):
|
113 |
+
file_path = os.path.join(audio_dir, file)
|
114 |
+
audio_files.append({
|
115 |
+
"filename": file,
|
116 |
+
"path": f"/audio-files/{file}",
|
117 |
+
"size": os.path.getsize(file_path)
|
118 |
+
})
|
119 |
+
return audio_files
|
120 |
+
except Exception as e:
|
121 |
+
raise HTTPException(status_code=500, detail=str(e))
|
122 |
+
|
123 |
+
@app.delete("/audio/{filename}")
|
124 |
+
async def delete_audio_file(filename: str):
|
125 |
+
"""Delete an audio file and its corresponding transcript."""
|
126 |
+
try:
|
127 |
+
# Delete audio file
|
128 |
+
file_path = os.path.join(audio_dir, filename)
|
129 |
+
if not os.path.exists(file_path):
|
130 |
+
raise HTTPException(status_code=404, detail="File not found")
|
131 |
+
|
132 |
+
# Get all audio files to determine the podcast ID
|
133 |
+
audio_files = [f for f in os.listdir(audio_dir) if f.endswith(('.mp3', '.wav'))]
|
134 |
+
try:
|
135 |
+
# Find the index (0-based) of the file being deleted
|
136 |
+
podcast_id = audio_files.index(filename) + 1 # Convert to 1-based ID
|
137 |
+
logger.info(f"Deleting podcast with ID: {podcast_id}")
|
138 |
+
|
139 |
+
# Path to transcripts file
|
140 |
+
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
141 |
+
|
142 |
+
# Update transcripts if file exists
|
143 |
+
if os.path.exists(transcripts_file):
|
144 |
+
with open(transcripts_file, 'r') as f:
|
145 |
+
transcripts = json.load(f)
|
146 |
+
|
147 |
+
# Remove the transcript at the corresponding index
|
148 |
+
if len(transcripts) >= podcast_id:
|
149 |
+
transcripts.pop(podcast_id - 1) # Convert back to 0-based index
|
150 |
+
|
151 |
+
# Save updated transcripts
|
152 |
+
with open(transcripts_file, 'w') as f:
|
153 |
+
json.dump(transcripts, f, indent=2)
|
154 |
+
logger.info(f"Removed transcript for podcast ID {podcast_id}")
|
155 |
+
|
156 |
+
# Delete the audio file
|
157 |
+
os.remove(file_path)
|
158 |
+
logger.info(f"Deleted audio file: {filename}")
|
159 |
+
|
160 |
+
return {"message": "File and transcript deleted successfully"}
|
161 |
+
|
162 |
+
except ValueError:
|
163 |
+
logger.error(f"Could not determine podcast ID for file: {filename}")
|
164 |
+
# Still delete the audio file even if transcript removal fails
|
165 |
+
os.remove(file_path)
|
166 |
+
return {"message": "Audio file deleted, but transcript could not be removed"}
|
167 |
+
|
168 |
+
except Exception as e:
|
169 |
+
logger.error(f"Error in delete_audio_file: {str(e)}")
|
170 |
+
raise HTTPException(status_code=500, detail=str(e))
|
171 |
+
|
172 |
+
@app.get("/audio/{filename}")
|
173 |
+
async def get_audio_file(filename: str):
|
174 |
+
"""Get an audio file by filename."""
|
175 |
+
try:
|
176 |
+
file_path = os.path.join(audio_dir, filename)
|
177 |
+
if not os.path.exists(file_path):
|
178 |
+
raise HTTPException(status_code=404, detail="File not found")
|
179 |
+
return FileResponse(file_path)
|
180 |
+
except Exception as e:
|
181 |
+
raise HTTPException(status_code=500, detail=str(e))
|
182 |
+
|
183 |
+
@app.get("/podcast/{podcast_id}/context")
|
184 |
+
async def get_podcast_context(podcast_id: str):
|
185 |
+
"""Get or generate context for a podcast."""
|
186 |
+
try:
|
187 |
+
logger.info(f"Getting context for podcast {podcast_id}")
|
188 |
+
context_path = os.path.join(context_dir, f"{podcast_id}_context.json")
|
189 |
+
|
190 |
+
# If context exists, return it
|
191 |
+
if os.path.exists(context_path):
|
192 |
+
logger.info(f"Found existing context file at {context_path}")
|
193 |
+
with open(context_path, 'r') as f:
|
194 |
+
return json.load(f)
|
195 |
+
|
196 |
+
# If no context exists, we need to create it from the podcast content
|
197 |
+
logger.info("No existing context found, creating new context")
|
198 |
+
|
199 |
+
# Get the audio files to find the podcast filename
|
200 |
+
files = os.listdir(audio_dir)
|
201 |
+
logger.info(f"Found {len(files)} files in audio directory")
|
202 |
+
podcast_files = [f for f in files if f.endswith('.mp3')]
|
203 |
+
logger.info(f"Found {len(podcast_files)} podcast files: {podcast_files}")
|
204 |
+
|
205 |
+
if not podcast_files:
|
206 |
+
logger.error("No podcast files found")
|
207 |
+
raise HTTPException(status_code=404, detail="No podcast files found")
|
208 |
+
|
209 |
+
# Find the podcast file that matches this ID
|
210 |
+
try:
|
211 |
+
podcast_index = int(podcast_id) - 1 # Convert 1-based ID to 0-based index
|
212 |
+
if podcast_index < 0 or podcast_index >= len(podcast_files):
|
213 |
+
raise ValueError(f"Invalid podcast ID: {podcast_id}, total podcasts: {len(podcast_files)}")
|
214 |
+
podcast_filename = podcast_files[podcast_index]
|
215 |
+
logger.info(f"Selected podcast file: {podcast_filename}")
|
216 |
+
except (ValueError, IndexError) as e:
|
217 |
+
logger.error(f"Invalid podcast ID: {podcast_id}, Error: {str(e)}")
|
218 |
+
raise HTTPException(status_code=404, detail=f"Invalid podcast ID: {podcast_id}")
|
219 |
+
|
220 |
+
# Extract topic from filename
|
221 |
+
try:
|
222 |
+
topic = podcast_filename.split('-')[0].replace('_', ' ')
|
223 |
+
logger.info(f"Extracted topic: {topic}")
|
224 |
+
except Exception as e:
|
225 |
+
logger.error(f"Error extracting topic from filename: {podcast_filename}, Error: {str(e)}")
|
226 |
+
raise HTTPException(status_code=500, detail=f"Error extracting topic from filename: {str(e)}")
|
227 |
+
|
228 |
+
# Initialize OpenAI chat model for content analysis
|
229 |
+
try:
|
230 |
+
chat_model = ChatOpenAI(
|
231 |
+
model_name="gpt-3.5-turbo",
|
232 |
+
temperature=0.3,
|
233 |
+
openai_api_key=openai_api_key
|
234 |
+
)
|
235 |
+
logger.info("Successfully initialized ChatOpenAI")
|
236 |
+
except Exception as e:
|
237 |
+
logger.error(f"Error initializing ChatOpenAI: {str(e)}")
|
238 |
+
raise HTTPException(status_code=500, detail=f"Error initializing chat model: {str(e)}")
|
239 |
+
|
240 |
+
# Create prompt template for content analysis
|
241 |
+
prompt = ChatPromptTemplate.from_messages([
|
242 |
+
("system", """You are an expert content analyzer. Your task is to:
|
243 |
+
1. Analyze the given topic and create balanced, factual content chunks about it
|
244 |
+
2. Generate two types of chunks:
|
245 |
+
- Believer chunks: Positive aspects, opportunities, and solutions related to the topic
|
246 |
+
- Skeptic chunks: Challenges, risks, and critical questions about the topic
|
247 |
+
3. Each chunk should be self-contained and focused on a single point
|
248 |
+
4. Keep chunks concise (2-3 sentences each)
|
249 |
+
5. Ensure all content is factual and balanced
|
250 |
+
|
251 |
+
Format your response as a JSON object with two arrays:
|
252 |
+
{{
|
253 |
+
"believer_chunks": ["chunk1", "chunk2", ...],
|
254 |
+
"skeptic_chunks": ["chunk1", "chunk2", ...]
|
255 |
+
}}"""),
|
256 |
+
("human", "Create balanced content chunks about this topic: {topic}")
|
257 |
+
])
|
258 |
+
|
259 |
+
# Generate content chunks
|
260 |
+
chain = prompt | chat_model
|
261 |
+
|
262 |
+
try:
|
263 |
+
logger.info(f"Generating content chunks for topic: {topic}")
|
264 |
+
response = await chain.ainvoke({
|
265 |
+
"topic": topic
|
266 |
+
})
|
267 |
+
logger.info("Successfully received response from OpenAI")
|
268 |
+
|
269 |
+
# Parse the response content as JSON
|
270 |
+
try:
|
271 |
+
content_chunks = json.loads(response.content)
|
272 |
+
logger.info(f"Successfully parsed response JSON with {len(content_chunks.get('believer_chunks', []))} believer chunks and {len(content_chunks.get('skeptic_chunks', []))} skeptic chunks")
|
273 |
+
except json.JSONDecodeError as e:
|
274 |
+
logger.error(f"Error parsing response JSON: {str(e)}, Response content: {response.content}")
|
275 |
+
raise HTTPException(status_code=500, detail=f"Error parsing content chunks: {str(e)}")
|
276 |
+
|
277 |
+
# Create the context object
|
278 |
+
context = {
|
279 |
+
"topic": topic,
|
280 |
+
"believer_chunks": content_chunks.get("believer_chunks", []),
|
281 |
+
"skeptic_chunks": content_chunks.get("skeptic_chunks", [])
|
282 |
+
}
|
283 |
+
|
284 |
+
# Save the context
|
285 |
+
try:
|
286 |
+
with open(context_path, 'w') as f:
|
287 |
+
json.dump(context, f)
|
288 |
+
logger.info(f"Saved new context to {context_path}")
|
289 |
+
except Exception as e:
|
290 |
+
logger.error(f"Error saving context file: {str(e)}")
|
291 |
+
raise HTTPException(status_code=500, detail=f"Error saving context file: {str(e)}")
|
292 |
+
|
293 |
+
return context
|
294 |
+
|
295 |
+
except Exception as e:
|
296 |
+
logger.error(f"Error generating content chunks: {str(e)}")
|
297 |
+
raise HTTPException(
|
298 |
+
status_code=500,
|
299 |
+
detail=f"Error generating content chunks: {str(e)}"
|
300 |
+
)
|
301 |
+
|
302 |
+
except HTTPException:
|
303 |
+
raise
|
304 |
+
except Exception as e:
|
305 |
+
logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
|
306 |
+
raise HTTPException(status_code=500, detail=str(e))
|
307 |
+
|
308 |
+
@app.post("/chat")
|
309 |
+
async def chat(message: ChatMessage):
|
310 |
+
"""Process a chat message with context-awareness."""
|
311 |
+
try:
|
312 |
+
# Log incoming message
|
313 |
+
logger.info(f"Received chat message: {message}")
|
314 |
+
|
315 |
+
# Get API key
|
316 |
+
tavily_api_key = os.getenv("TAVILY_API_KEY")
|
317 |
+
if not tavily_api_key:
|
318 |
+
logger.error("Tavily API key not found")
|
319 |
+
raise HTTPException(status_code=500, detail="Tavily API key not configured")
|
320 |
+
|
321 |
+
# Initialize the workflow
|
322 |
+
try:
|
323 |
+
workflow = create_workflow(tavily_api_key)
|
324 |
+
logger.info("Workflow created successfully")
|
325 |
+
except Exception as e:
|
326 |
+
logger.error(f"Error creating workflow: {str(e)}")
|
327 |
+
raise HTTPException(status_code=500, detail=f"Error creating workflow: {str(e)}")
|
328 |
+
|
329 |
+
# Run the workflow with context
|
330 |
+
try:
|
331 |
+
result = await run_workflow(
|
332 |
+
workflow,
|
333 |
+
message.content,
|
334 |
+
agent_type=message.agent_type,
|
335 |
+
context=message.context
|
336 |
+
)
|
337 |
+
logger.info("Workflow completed successfully")
|
338 |
+
except Exception as e:
|
339 |
+
logger.error(f"Error running workflow: {str(e)}")
|
340 |
+
raise HTTPException(status_code=500, detail=f"Error running workflow: {str(e)}")
|
341 |
+
|
342 |
+
return WorkflowResponse(**result)
|
343 |
+
except Exception as e:
|
344 |
+
logger.error(f"Error in chat endpoint: {str(e)}", exc_info=True)
|
345 |
+
raise HTTPException(status_code=500, detail=str(e))
|
346 |
+
|
347 |
+
@app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
|
348 |
+
async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
349 |
+
"""Handle chat messages for a specific podcast."""
|
350 |
+
try:
|
351 |
+
logger.info(f"Processing chat message for podcast {podcast_id}")
|
352 |
+
|
353 |
+
# Path to transcripts file
|
354 |
+
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
355 |
+
|
356 |
+
# Check if transcripts file exists
|
357 |
+
if not os.path.exists(transcripts_file):
|
358 |
+
raise HTTPException(status_code=404, detail="Transcripts file not found")
|
359 |
+
|
360 |
+
# Read transcripts
|
361 |
+
with open(transcripts_file, 'r') as f:
|
362 |
+
transcripts = json.load(f)
|
363 |
+
|
364 |
+
# Convert podcast_id to zero-based index
|
365 |
+
try:
|
366 |
+
podcast_index = int(podcast_id) - 1
|
367 |
+
if podcast_index < 0 or podcast_index >= len(transcripts):
|
368 |
+
raise ValueError(f"Invalid podcast ID: {podcast_id}")
|
369 |
+
except ValueError as e:
|
370 |
+
raise HTTPException(status_code=404, detail=str(e))
|
371 |
+
|
372 |
+
# Get podcast transcript
|
373 |
+
podcast_transcript = transcripts[podcast_index]["podcastScript"]
|
374 |
+
|
375 |
+
# Split text into chunks
|
376 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
377 |
+
chunk_size=500,
|
378 |
+
chunk_overlap=50,
|
379 |
+
length_function=len,
|
380 |
+
)
|
381 |
+
|
382 |
+
# Use split_text for strings instead of split_documents
|
383 |
+
chunks = text_splitter.split_text(podcast_transcript)
|
384 |
+
|
385 |
+
# Initialize embedding model
|
386 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
387 |
+
|
388 |
+
# Create a unique collection name for this podcast
|
389 |
+
collection_name = f"podcast_{podcast_id}"
|
390 |
+
|
391 |
+
# Initialize Qdrant with local storage
|
392 |
+
vectorstore = Qdrant.from_texts(
|
393 |
+
texts=chunks,
|
394 |
+
embedding=embedding_model,
|
395 |
+
location=":memory:", # Use in-memory storage
|
396 |
+
collection_name=collection_name
|
397 |
+
)
|
398 |
+
|
399 |
+
# Configure the retriever with search parameters
|
400 |
+
qdrant_retriever = vectorstore.as_retriever(
|
401 |
+
search_type="similarity",
|
402 |
+
search_kwargs={"k": 3} # Get top 3 most relevant chunks
|
403 |
+
)
|
404 |
+
|
405 |
+
base_rag_prompt_template = """\
|
406 |
+
You are a helpful podcast assistant. Answer the user's question based on the provided context from the podcast transcript.
|
407 |
+
If you can't find the answer in the context, just say "I don't have enough information to answer that question."
|
408 |
+
Keep your responses concise and focused on the question.
|
409 |
+
|
410 |
+
Context:
|
411 |
+
{context}
|
412 |
+
|
413 |
+
Question:
|
414 |
+
{question}
|
415 |
+
"""
|
416 |
+
|
417 |
+
base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
|
418 |
+
base_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
|
419 |
+
|
420 |
+
# Create the RAG chain
|
421 |
+
def format_docs(docs):
|
422 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
423 |
+
|
424 |
+
# Add logging for the retrieved documents and final prompt
|
425 |
+
def get_context_and_log(input_dict):
|
426 |
+
context = format_docs(qdrant_retriever.get_relevant_documents(input_dict["question"]))
|
427 |
+
logger.info("Retrieved context from podcast:")
|
428 |
+
logger.info("-" * 50)
|
429 |
+
logger.info(f"Context:\n{context}")
|
430 |
+
logger.info("-" * 50)
|
431 |
+
logger.info(f"Question: {input_dict['question']}")
|
432 |
+
logger.info("-" * 50)
|
433 |
+
return {"context": context, "question": input_dict["question"]}
|
434 |
+
|
435 |
+
# Create the chain
|
436 |
+
chain = (
|
437 |
+
RunnablePassthrough()
|
438 |
+
| get_context_and_log
|
439 |
+
| base_rag_prompt
|
440 |
+
| base_llm
|
441 |
+
)
|
442 |
+
|
443 |
+
# Get response
|
444 |
+
response = chain.invoke({"question": request.message})
|
445 |
+
|
446 |
+
return PodcastChatResponse(response=response.content)
|
447 |
+
|
448 |
+
except Exception as e:
|
449 |
+
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
450 |
+
raise HTTPException(status_code=500, detail=str(e))
|
451 |
+
|
452 |
+
if __name__ == "__main__":
|
453 |
+
import uvicorn
|
454 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
server/qdrant_db/.lock
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
tmp lock file
|
server/qdrant_db/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"collections": {}, "aliases": {}}
|
server/requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.109.2
|
2 |
+
uvicorn==0.27.1
|
3 |
+
pydantic>=2.7.4
|
4 |
+
langchain>=0.3.15
|
5 |
+
langchain-community>=0.3.15
|
6 |
+
langchain-openai>=0.3.2
|
7 |
+
langchain-qdrant>=0.1.1
|
8 |
+
openai>=1.58.1
|
9 |
+
langgraph>=0.2.67
|
10 |
+
python-dotenv==1.0.1
|
11 |
+
gtts==2.5.1
|
12 |
+
pyyaml==6.0.1
|
13 |
+
tiktoken>=0.6.0
|
14 |
+
qdrant-client>=1.7.0
|
server/test_workflow.py
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
from workflow import create_workflow, run_workflow, TRANSCRIPTS_FILE
|
5 |
+
import json
|
6 |
+
from datetime import datetime
|
7 |
+
import traceback
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
def log_step(step: str, details: str = None):
|
13 |
+
"""Print a formatted step log"""
|
14 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
15 |
+
print(f"\n[{timestamp}] 🔄 {step}")
|
16 |
+
if details:
|
17 |
+
print(f" {details}")
|
18 |
+
|
19 |
+
def log_agent(agent: str, message: str):
|
20 |
+
"""Print a formatted agent message"""
|
21 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
22 |
+
agent_icons = {
|
23 |
+
"extractor": "🔍",
|
24 |
+
"skeptic": "🤔",
|
25 |
+
"believer": "💡",
|
26 |
+
"supervisor": "👀",
|
27 |
+
"storage": "📦",
|
28 |
+
"podcast": "🎙️",
|
29 |
+
"error": "❌",
|
30 |
+
"step": "➡️"
|
31 |
+
}
|
32 |
+
icon = agent_icons.get(agent.lower(), "💬")
|
33 |
+
print(f"\n[{timestamp}] {icon} {agent}:")
|
34 |
+
print(f" {message}")
|
35 |
+
|
36 |
+
def check_api_keys():
|
37 |
+
"""Check if all required API keys are present"""
|
38 |
+
required_keys = {
|
39 |
+
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
|
40 |
+
"ELEVEN_API_KEY": os.getenv("ELEVEN_API_KEY"),
|
41 |
+
"TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")
|
42 |
+
}
|
43 |
+
|
44 |
+
missing_keys = [key for key, value in required_keys.items() if not value]
|
45 |
+
if missing_keys:
|
46 |
+
raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")
|
47 |
+
|
48 |
+
return required_keys["TAVILY_API_KEY"]
|
49 |
+
|
50 |
+
async def test_transcript_saving(workflow, query: str):
|
51 |
+
"""Test that transcripts are properly saved to podcasts.json"""
|
52 |
+
try:
|
53 |
+
# Get initial transcript count
|
54 |
+
initial_transcripts = []
|
55 |
+
if os.path.exists(TRANSCRIPTS_FILE):
|
56 |
+
with open(TRANSCRIPTS_FILE, 'r') as f:
|
57 |
+
initial_transcripts = json.load(f)
|
58 |
+
initial_count = len(initial_transcripts)
|
59 |
+
|
60 |
+
# Run workflow
|
61 |
+
result = await run_workflow(workflow, query)
|
62 |
+
|
63 |
+
# Verify transcript was saved
|
64 |
+
if not os.path.exists(TRANSCRIPTS_FILE):
|
65 |
+
return False, "Transcripts file was not created"
|
66 |
+
|
67 |
+
with open(TRANSCRIPTS_FILE, 'r') as f:
|
68 |
+
transcripts = json.load(f)
|
69 |
+
|
70 |
+
if len(transcripts) <= initial_count:
|
71 |
+
return False, "No new transcript was added"
|
72 |
+
|
73 |
+
latest_transcript = transcripts[-1]
|
74 |
+
if not all(key in latest_transcript for key in ["id", "podcastScript", "topic"]):
|
75 |
+
return False, "Transcript is missing required fields"
|
76 |
+
|
77 |
+
if latest_transcript["topic"] != query:
|
78 |
+
return False, f"Topic mismatch. Expected: {query}, Got: {latest_transcript['topic']}"
|
79 |
+
|
80 |
+
return True, "Transcript was saved successfully"
|
81 |
+
|
82 |
+
except Exception as e:
|
83 |
+
return False, f"Error in transcript test: {str(e)}\n{traceback.format_exc()}"
|
84 |
+
|
85 |
+
async def test_single_turn(workflow_graph, query: str):
|
86 |
+
"""Test a single turn of the workflow"""
|
87 |
+
result = await run_workflow(workflow_graph, query)
|
88 |
+
return len(result["debate_history"]) > 0
|
89 |
+
|
90 |
+
async def test_debate_length(workflow, query):
|
91 |
+
"""Test that debate history does not exceed 20 messages"""
|
92 |
+
result = await run_workflow(workflow, query)
|
93 |
+
return len(result["debate_history"]) <= 20
|
94 |
+
|
95 |
+
async def test_podcast_generation(workflow, query):
|
96 |
+
"""Test podcast generation functionality"""
|
97 |
+
try:
|
98 |
+
result = await run_workflow(workflow, query)
|
99 |
+
|
100 |
+
# Check for podcast data
|
101 |
+
if "final_podcast" not in result:
|
102 |
+
return False, "No podcast data in result"
|
103 |
+
|
104 |
+
podcast_data = result["final_podcast"]
|
105 |
+
|
106 |
+
# Check for errors in podcast generation
|
107 |
+
if "error" in podcast_data:
|
108 |
+
return False, f"Podcast generation error: {podcast_data['error']}"
|
109 |
+
|
110 |
+
# Verify script generation
|
111 |
+
if not podcast_data.get("content"):
|
112 |
+
return False, "No podcast script generated"
|
113 |
+
|
114 |
+
# Verify audio file generation
|
115 |
+
if not podcast_data.get("audio_file"):
|
116 |
+
return False, "No audio file generated"
|
117 |
+
|
118 |
+
# Check if audio file exists
|
119 |
+
audio_path = os.path.join(os.path.dirname(__file__), "audio_storage", podcast_data["audio_file"])
|
120 |
+
if not os.path.exists(audio_path):
|
121 |
+
return False, f"Audio file not found at {audio_path}"
|
122 |
+
|
123 |
+
# Check file size
|
124 |
+
file_size = os.path.getsize(audio_path)
|
125 |
+
if file_size == 0:
|
126 |
+
return False, "Audio file is empty"
|
127 |
+
|
128 |
+
# Check if transcript was saved
|
129 |
+
transcript_success, transcript_message = await test_transcript_saving(workflow, query)
|
130 |
+
if not transcript_success:
|
131 |
+
return False, f"Transcript saving failed: {transcript_message}"
|
132 |
+
|
133 |
+
return True, f"Podcast generated successfully (file size: {file_size} bytes)"
|
134 |
+
|
135 |
+
except Exception as e:
|
136 |
+
return False, f"Error in podcast test: {str(e)}\n{traceback.format_exc()}"
|
137 |
+
|
138 |
+
async def run_all_tests():
|
139 |
+
log_step("Running all tests")
|
140 |
+
|
141 |
+
try:
|
142 |
+
# Check API keys
|
143 |
+
tavily_api_key = check_api_keys()
|
144 |
+
|
145 |
+
# Create workflow
|
146 |
+
workflow = create_workflow(tavily_api_key)
|
147 |
+
|
148 |
+
# Test queries
|
149 |
+
queries = [
|
150 |
+
"What are the environmental impacts of electric vehicles?",
|
151 |
+
"How does artificial intelligence impact healthcare?",
|
152 |
+
"What are the pros and cons of remote work?",
|
153 |
+
"Discuss the future of space exploration"
|
154 |
+
]
|
155 |
+
|
156 |
+
results = {}
|
157 |
+
for query in queries:
|
158 |
+
try:
|
159 |
+
log_step(f"Testing query", query)
|
160 |
+
|
161 |
+
# Test transcript saving
|
162 |
+
transcript_success, transcript_message = await test_transcript_saving(workflow, query)
|
163 |
+
log_agent("step", f"Transcript test: {transcript_message}")
|
164 |
+
|
165 |
+
result = await run_workflow(workflow, query)
|
166 |
+
|
167 |
+
podcast_success, podcast_message = await test_podcast_generation(workflow, query)
|
168 |
+
|
169 |
+
results[query] = {
|
170 |
+
"success": True,
|
171 |
+
"debate_length": len(result["debate_history"]),
|
172 |
+
"supervisor_notes": len(result["supervisor_notes"]),
|
173 |
+
"transcript_saved": transcript_success,
|
174 |
+
"transcript_status": transcript_message,
|
175 |
+
"podcast_generated": podcast_success,
|
176 |
+
"podcast_status": podcast_message,
|
177 |
+
"timestamp": datetime.now().isoformat()
|
178 |
+
}
|
179 |
+
|
180 |
+
log_agent("step", f"Test completed for: {query}")
|
181 |
+
|
182 |
+
except Exception as e:
|
183 |
+
results[query] = {
|
184 |
+
"success": False,
|
185 |
+
"error": str(e),
|
186 |
+
"traceback": traceback.format_exc(),
|
187 |
+
"timestamp": datetime.now().isoformat()
|
188 |
+
}
|
189 |
+
log_agent("error", f"Test failed for: {query}\n{str(e)}")
|
190 |
+
|
191 |
+
# Save results
|
192 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
193 |
+
filename = f"test_results_{timestamp}.json"
|
194 |
+
with open(filename, "w") as f:
|
195 |
+
json.dump(results, f, indent=2)
|
196 |
+
|
197 |
+
log_step("Results saved", f"File: {filename}")
|
198 |
+
print("\nTest Results:")
|
199 |
+
print(json.dumps(results, indent=2))
|
200 |
+
|
201 |
+
return results
|
202 |
+
|
203 |
+
except Exception as e:
|
204 |
+
log_agent("error", f"Critical error in tests: {str(e)}\n{traceback.format_exc()}")
|
205 |
+
raise
|
206 |
+
|
207 |
+
async def test_workflow():
|
208 |
+
log_step("Starting workflow test")
|
209 |
+
|
210 |
+
try:
|
211 |
+
# Check API keys
|
212 |
+
tavily_api_key = check_api_keys()
|
213 |
+
|
214 |
+
# Create the workflow
|
215 |
+
log_step("Creating workflow graph")
|
216 |
+
workflow_graph = create_workflow(tavily_api_key)
|
217 |
+
|
218 |
+
# Test query
|
219 |
+
test_query = "Should artificial intelligence be regulated?"
|
220 |
+
log_step("Test Query", test_query)
|
221 |
+
|
222 |
+
# Run the workflow
|
223 |
+
log_step("Running workflow")
|
224 |
+
result = await run_workflow(workflow_graph, test_query)
|
225 |
+
|
226 |
+
# Test transcript saving
|
227 |
+
log_step("Testing transcript saving")
|
228 |
+
transcript_success, transcript_message = await test_transcript_saving(workflow_graph, test_query)
|
229 |
+
log_agent("step", f"Transcript test: {transcript_message}")
|
230 |
+
|
231 |
+
# Print extractor results
|
232 |
+
log_step("Information Extraction Phase")
|
233 |
+
if "extractor_data" in result:
|
234 |
+
log_agent("Extractor", result["extractor_data"].get("content", "No content"))
|
235 |
+
|
236 |
+
# Print debate history
|
237 |
+
log_step("Debate Phase")
|
238 |
+
print("\nDebate Timeline:")
|
239 |
+
for i, entry in enumerate(result["debate_history"], 1):
|
240 |
+
log_agent(entry["speaker"], entry["content"])
|
241 |
+
if i < len(result["supervisor_notes"]):
|
242 |
+
log_agent("Supervisor", f"Analysis of Turn {i}:\n {result['supervisor_notes'][i]}")
|
243 |
+
|
244 |
+
# Print final supervisor analysis
|
245 |
+
log_step("Final Supervisor Analysis")
|
246 |
+
if result["supervisor_notes"]:
|
247 |
+
log_agent("Supervisor", result["supervisor_notes"][-1])
|
248 |
+
|
249 |
+
# Print podcast results
|
250 |
+
log_step("Podcast Production Phase")
|
251 |
+
if "final_podcast" in result:
|
252 |
+
podcast_data = result["final_podcast"]
|
253 |
+
if "error" in podcast_data:
|
254 |
+
log_agent("Podcast", f"Error: {podcast_data['error']}")
|
255 |
+
else:
|
256 |
+
log_agent("Podcast", "Script:\n" + podcast_data["content"])
|
257 |
+
if podcast_data.get("audio_file"):
|
258 |
+
audio_path = os.path.join(os.path.dirname(__file__), "audio_storage", podcast_data["audio_file"])
|
259 |
+
file_size = os.path.getsize(audio_path) if os.path.exists(audio_path) else 0
|
260 |
+
log_agent("Podcast", f"Audio file saved as: {podcast_data['audio_file']} (size: {file_size} bytes)")
|
261 |
+
|
262 |
+
# Save results
|
263 |
+
log_step("Saving Results")
|
264 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
265 |
+
filename = f"test_results_{timestamp}.json"
|
266 |
+
with open(filename, "w") as f:
|
267 |
+
json.dump({
|
268 |
+
"timestamp": datetime.now().isoformat(),
|
269 |
+
"query": test_query,
|
270 |
+
"workflow_results": result,
|
271 |
+
"transcript_saved": transcript_success,
|
272 |
+
"transcript_status": transcript_message
|
273 |
+
}, f, indent=2)
|
274 |
+
log_step("Results Saved", f"File: {filename}")
|
275 |
+
|
276 |
+
except Exception as e:
|
277 |
+
log_step("ERROR", f"Workflow execution failed: {str(e)}")
|
278 |
+
print("\nFull traceback:")
|
279 |
+
print(traceback.format_exc())
|
280 |
+
raise
|
281 |
+
|
282 |
+
if __name__ == "__main__":
|
283 |
+
print("\n" + "="*50)
|
284 |
+
print("🤖 Starting AI Debate Workflow Test")
|
285 |
+
print("="*50)
|
286 |
+
|
287 |
+
try:
|
288 |
+
asyncio.run(test_workflow())
|
289 |
+
print("\n" + "="*50)
|
290 |
+
print("✅ Test Complete")
|
291 |
+
print("="*50)
|
292 |
+
|
293 |
+
# Run comprehensive tests
|
294 |
+
print("\nRunning comprehensive tests...")
|
295 |
+
asyncio.run(run_all_tests())
|
296 |
+
print("\n" + "="*50)
|
297 |
+
print("✅ All Tests Complete")
|
298 |
+
print("="*50)
|
299 |
+
except Exception as e:
|
300 |
+
print("\n" + "="*50)
|
301 |
+
print(f"❌ Tests Failed: {str(e)}")
|
302 |
+
print("="*50)
|
303 |
+
raise
|
server/transcripts/podcasts.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[]
|
server/workflow.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any, List, Annotated, TypedDict, Union, Optional
|
2 |
+
from langgraph.graph import Graph, END
|
3 |
+
from agents import create_agents
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import json
|
7 |
+
import uuid
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# Create transcripts directory if it doesn't exist
|
13 |
+
TRANSCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "transcripts")
|
14 |
+
os.makedirs(TRANSCRIPTS_DIR, exist_ok=True)
|
15 |
+
TRANSCRIPTS_FILE = os.path.join(TRANSCRIPTS_DIR, "podcasts.json")
|
16 |
+
|
17 |
+
def save_transcript(podcast_script: str, user_query: str) -> None:
|
18 |
+
"""Save podcast transcript to JSON file."""
|
19 |
+
# Create new transcript entry
|
20 |
+
transcript = {
|
21 |
+
"id": str(uuid.uuid4()),
|
22 |
+
"podcastScript": podcast_script,
|
23 |
+
"topic": user_query
|
24 |
+
}
|
25 |
+
|
26 |
+
try:
|
27 |
+
# Load existing transcripts
|
28 |
+
if os.path.exists(TRANSCRIPTS_FILE):
|
29 |
+
with open(TRANSCRIPTS_FILE, 'r') as f:
|
30 |
+
transcripts = json.load(f)
|
31 |
+
else:
|
32 |
+
transcripts = []
|
33 |
+
|
34 |
+
# Append new transcript
|
35 |
+
transcripts.append(transcript)
|
36 |
+
|
37 |
+
# Save updated transcripts
|
38 |
+
with open(TRANSCRIPTS_FILE, 'w') as f:
|
39 |
+
json.dump(transcripts, f, indent=2)
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error saving transcript: {str(e)}")
|
43 |
+
|
44 |
+
class AgentState(TypedDict):
|
45 |
+
messages: List[Dict[str, Any]]
|
46 |
+
current_agent: str
|
47 |
+
debate_turns: int
|
48 |
+
extractor_data: Dict[str, Any]
|
49 |
+
debate_history: List[Dict[str, Any]]
|
50 |
+
supervisor_notes: List[str]
|
51 |
+
supervisor_chunks: List[Dict[str, List[str]]]
|
52 |
+
final_podcast: Dict[str, Any]
|
53 |
+
agent_type: str
|
54 |
+
context: Optional[Dict[str, Any]]
|
55 |
+
|
56 |
+
def create_workflow(tavily_api_key: str):
|
57 |
+
# Initialize all agents
|
58 |
+
agents = create_agents(tavily_api_key)
|
59 |
+
|
60 |
+
# Create the graph
|
61 |
+
workflow = Graph()
|
62 |
+
|
63 |
+
# Define the extractor node function
|
64 |
+
async def run_extractor(state: AgentState) -> Dict[str, Any]:
|
65 |
+
query = state["messages"][-1]["content"]
|
66 |
+
print(f"Extractor processing query: {query}")
|
67 |
+
|
68 |
+
try:
|
69 |
+
response = await agents["extractor"](query)
|
70 |
+
print(f"Extractor response: {response}")
|
71 |
+
|
72 |
+
# Update state
|
73 |
+
state["extractor_data"] = response
|
74 |
+
|
75 |
+
# Get initial supervisor analysis
|
76 |
+
supervisor_analysis = await agents["supervisor"]({
|
77 |
+
"extractor": response,
|
78 |
+
"skeptic": {"content": "Not started"},
|
79 |
+
"believer": {"content": "Not started"}
|
80 |
+
})
|
81 |
+
print(f"Initial supervisor analysis: {supervisor_analysis}")
|
82 |
+
|
83 |
+
state["supervisor_notes"].append(supervisor_analysis["content"])
|
84 |
+
state["supervisor_chunks"].append(supervisor_analysis.get("chunks", {}))
|
85 |
+
|
86 |
+
# Move to debate phase
|
87 |
+
state["current_agent"] = "debate"
|
88 |
+
return state
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Error in extractor: {str(e)}")
|
91 |
+
raise Exception(f"Error in extractor: {str(e)}")
|
92 |
+
|
93 |
+
# Define the debate node function
|
94 |
+
async def run_debate(state: AgentState) -> Dict[str, Any]:
|
95 |
+
print(f"Debate turn {state['debate_turns']}")
|
96 |
+
|
97 |
+
try:
|
98 |
+
if state["debate_turns"] == 0:
|
99 |
+
# First turn: both agents respond to extractor
|
100 |
+
print("Starting first debate turn")
|
101 |
+
|
102 |
+
# If we have context, use it to inform the agents' responses
|
103 |
+
context = state.get("context", {})
|
104 |
+
agent_chunks = context.get("agent_chunks", []) if context else []
|
105 |
+
|
106 |
+
# Create context-aware input for agents
|
107 |
+
context_input = {
|
108 |
+
"content": state["extractor_data"]["content"],
|
109 |
+
"chunks": agent_chunks
|
110 |
+
}
|
111 |
+
|
112 |
+
skeptic_response = await agents["skeptic"](context_input)
|
113 |
+
believer_response = await agents["believer"](context_input)
|
114 |
+
|
115 |
+
state["debate_history"].extend([
|
116 |
+
{"speaker": "skeptic", "content": skeptic_response["content"]},
|
117 |
+
{"speaker": "believer", "content": believer_response["content"]}
|
118 |
+
])
|
119 |
+
print(f"First turn responses added: {state['debate_history'][-2:]}")
|
120 |
+
else:
|
121 |
+
# Alternating responses based on agent type if specified
|
122 |
+
if state["agent_type"] in ["believer", "skeptic"]:
|
123 |
+
current_speaker = state["agent_type"]
|
124 |
+
else:
|
125 |
+
# Default alternating behavior
|
126 |
+
last_speaker = state["debate_history"][-1]["speaker"]
|
127 |
+
current_speaker = "believer" if last_speaker == "skeptic" else "skeptic"
|
128 |
+
|
129 |
+
print(f"Processing response for {current_speaker}")
|
130 |
+
|
131 |
+
# Create context-aware input
|
132 |
+
context = state.get("context", {})
|
133 |
+
agent_chunks = context.get("agent_chunks", []) if context else []
|
134 |
+
context_input = {
|
135 |
+
"content": state["debate_history"][-1]["content"],
|
136 |
+
"chunks": agent_chunks
|
137 |
+
}
|
138 |
+
|
139 |
+
response = await agents[current_speaker](context_input)
|
140 |
+
|
141 |
+
state["debate_history"].append({
|
142 |
+
"speaker": current_speaker,
|
143 |
+
"content": response["content"]
|
144 |
+
})
|
145 |
+
print(f"Added response: {state['debate_history'][-1]}")
|
146 |
+
|
147 |
+
# Add supervisor note and chunks
|
148 |
+
supervisor_analysis = await agents["supervisor"]({
|
149 |
+
"extractor": state["extractor_data"],
|
150 |
+
"skeptic": {"content": state["debate_history"][-1]["content"]},
|
151 |
+
"believer": {"content": state["debate_history"][-2]["content"] if len(state["debate_history"]) > 1 else "Not started"}
|
152 |
+
})
|
153 |
+
print(f"Supervisor analysis: {supervisor_analysis}")
|
154 |
+
|
155 |
+
state["supervisor_notes"].append(supervisor_analysis["content"])
|
156 |
+
state["supervisor_chunks"].append(supervisor_analysis.get("chunks", {}))
|
157 |
+
|
158 |
+
state["debate_turns"] += 1
|
159 |
+
print(f"Debate turn {state['debate_turns']} completed")
|
160 |
+
|
161 |
+
# End the workflow after 2 debate turns
|
162 |
+
if state["debate_turns"] >= 2:
|
163 |
+
state["current_agent"] = "podcast"
|
164 |
+
print("Moving to podcast production")
|
165 |
+
|
166 |
+
return state
|
167 |
+
except Exception as e:
|
168 |
+
print(f"Error in debate: {str(e)}")
|
169 |
+
raise Exception(f"Error in debate: {str(e)}")
|
170 |
+
|
171 |
+
async def run_podcast_producer(state: AgentState) -> Dict[str, Any]:
|
172 |
+
print("Starting podcast production")
|
173 |
+
|
174 |
+
try:
|
175 |
+
# Create podcast from debate
|
176 |
+
podcast_result = await agents["podcast_producer"](
|
177 |
+
state["debate_history"],
|
178 |
+
state["supervisor_notes"],
|
179 |
+
state["messages"][-1]["content"], # Pass the original user query
|
180 |
+
state["supervisor_chunks"],
|
181 |
+
{} # Empty quadrant analysis since we removed storage manager
|
182 |
+
)
|
183 |
+
print(f"Podcast production result: {podcast_result}")
|
184 |
+
|
185 |
+
# Save transcript to JSON file
|
186 |
+
save_transcript(
|
187 |
+
podcast_script=podcast_result["content"],
|
188 |
+
user_query=state["messages"][-1]["content"]
|
189 |
+
)
|
190 |
+
|
191 |
+
# Store the result
|
192 |
+
state["final_podcast"] = podcast_result
|
193 |
+
|
194 |
+
# End the workflow
|
195 |
+
state["current_agent"] = END
|
196 |
+
return state
|
197 |
+
except Exception as e:
|
198 |
+
print(f"Error in podcast production: {str(e)}")
|
199 |
+
raise Exception(f"Error in podcast production: {str(e)}")
|
200 |
+
|
201 |
+
# Add nodes to the graph
|
202 |
+
workflow.add_node("extractor", run_extractor)
|
203 |
+
workflow.add_node("debate", run_debate)
|
204 |
+
workflow.add_node("podcast", run_podcast_producer)
|
205 |
+
|
206 |
+
# Set the entry point
|
207 |
+
workflow.set_entry_point("extractor")
|
208 |
+
|
209 |
+
# Add edges
|
210 |
+
workflow.add_edge("extractor", "debate")
|
211 |
+
|
212 |
+
# Add conditional edges for debate
|
213 |
+
workflow.add_conditional_edges(
|
214 |
+
"debate",
|
215 |
+
lambda x: "podcast" if x["debate_turns"] >= 2 else "debate"
|
216 |
+
)
|
217 |
+
|
218 |
+
# Add edge from podcast to end
|
219 |
+
workflow.add_edge("podcast", END)
|
220 |
+
|
221 |
+
# Compile the graph
|
222 |
+
return workflow.compile()
|
223 |
+
|
224 |
+
async def run_workflow(
|
225 |
+
graph: Graph,
|
226 |
+
query: str,
|
227 |
+
agent_type: str = "believer",
|
228 |
+
context: Optional[Dict[str, Any]] = None
|
229 |
+
) -> Dict[str, Any]:
|
230 |
+
"""Run the workflow with a given query."""
|
231 |
+
# Initialize the state
|
232 |
+
initial_state = {
|
233 |
+
"messages": [{"role": "user", "content": query}],
|
234 |
+
"current_agent": "extractor",
|
235 |
+
"debate_turns": 0,
|
236 |
+
"extractor_data": {},
|
237 |
+
"debate_history": [],
|
238 |
+
"supervisor_notes": [],
|
239 |
+
"supervisor_chunks": [],
|
240 |
+
"final_podcast": {},
|
241 |
+
"agent_type": agent_type,
|
242 |
+
"context": context
|
243 |
+
}
|
244 |
+
|
245 |
+
# Run the graph
|
246 |
+
result = await graph.ainvoke(initial_state)
|
247 |
+
|
248 |
+
return {
|
249 |
+
"debate_history": result["debate_history"],
|
250 |
+
"supervisor_notes": result["supervisor_notes"],
|
251 |
+
"supervisor_chunks": result["supervisor_chunks"],
|
252 |
+
"extractor_data": result["extractor_data"],
|
253 |
+
"final_podcast": result["final_podcast"]
|
254 |
+
}
|