Nagesh Muralidhar commited on
Commit
a288236
·
1 Parent(s): 752c2c6

deploying server

Browse files
server/.env ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ OPENAI_API_KEY=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
2
+
3
+ ELEVEN_API_KEY=sk_ff6d28e7b04706abb03caf8c3239f75f373687ba440b2263
4
+ TAVILY_API_KEY=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
5
+
server/Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first for better caching
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy the rest of the application
15
+ COPY . .
16
+
17
+ # Create necessary directories
18
+ RUN mkdir -p audio_storage context_storage transcripts
19
+
20
+ # Expose the port
21
+ EXPOSE 7860
22
+
23
+ # Command to run the application
24
+ CMD ["python", "app.py"]
server/README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Podcast Discussion Server
2
+
3
+ This is a FastAPI-based server that provides podcast discussion and analysis capabilities.
4
+
5
+ ## Environment Variables
6
+
7
+ The following environment variables need to be set in the Hugging Face Space:
8
+
9
+ - `OPENAI_API_KEY`: Your OpenAI API key
10
+ - `ALLOWED_ORIGINS`: Comma-separated list of allowed origins (optional, defaults to Vercel domains and localhost)
11
+
12
+ ## API Endpoints
13
+
14
+ - `/chat`: Main chat endpoint for podcast discussions
15
+ - `/podcast-chat/{podcast_id}`: Chat endpoint for specific podcast discussions
16
+ - `/audio-list`: List available audio files
17
+ - `/audio/{filename}`: Get specific audio file
18
+ - `/podcast/{podcast_id}/context`: Get podcast context
19
+
20
+ ## Stack
21
+
22
+ - FastAPI
23
+ - OpenAI
24
+ - Langchain
25
+ - Qdrant
26
+ - GTTS
27
+
28
+ ## Deployment
29
+
30
+ ### Backend (Hugging Face Spaces)
31
+
32
+ This server is deployed on Hugging Face Spaces using their Docker deployment feature.
33
+
34
+ ### Frontend (Vercel)
35
+
36
+ When deploying the frontend to Vercel:
37
+
38
+ 1. Set the API base URL in your frontend environment:
39
+
40
+ ```
41
+ VITE_API_BASE_URL=https://your-username-your-space-name.hf.space
42
+ ```
43
+
44
+ 2. The server is already configured to accept requests from:
45
+
46
+ - All Vercel domains (\*.vercel.app)
47
+ - Local development servers (localhost:3000, localhost:5173)
48
+
49
+ 3. If you're using a custom domain, add it to the `ALLOWED_ORIGINS` environment variable in your Hugging Face Space:
50
+ ```
51
+ ALLOWED_ORIGINS=https://your-custom-domain.com,https://www.your-custom-domain.com
52
+ ```
53
+
54
+ ## Security Features
55
+
56
+ - CORS protection with specific origin allowlist
57
+ - Security headers (HSTS, XSS Protection, etc.)
58
+ - Rate limiting
59
+ - SSL/TLS encryption (provided by Hugging Face Spaces)
server/__pycache__/agents.cpython-311.pyc ADDED
Binary file (24.6 kB). View file
 
server/__pycache__/main.cpython-311.pyc ADDED
Binary file (26 kB). View file
 
server/__pycache__/shared.cpython-311.pyc ADDED
Binary file (3.68 kB). View file
 
server/__pycache__/transcript_manager.cpython-311.pyc ADDED
Binary file (7.04 kB). View file
 
server/__pycache__/workflow.cpython-311.pyc ADDED
Binary file (11.9 kB). View file
 
server/agents.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
3
+ from langchain_community.agent_toolkits.load_tools import load_tools
4
+ from langchain_community.tools.tavily_search.tool import TavilySearchResults
5
+ from langchain_community.tools import ElevenLabsText2SpeechTool
6
+ import tiktoken
7
+ from typing import Dict, Any, List, Optional
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from datetime import datetime
11
+ import logging
12
+ import json
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ import numpy as np
15
+ from langchain.schema import SystemMessage, HumanMessage, AIMessage
16
+ from langchain.output_parsers import PydanticOutputParser
17
+ from pydantic import BaseModel, Field
18
+
19
+ # Configure logging
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
23
+ handlers=[
24
+ logging.FileHandler('agents.log'),
25
+ logging.StreamHandler()
26
+ ]
27
+ )
28
+
29
+ # Create loggers for each agent
30
+ extractor_logger = logging.getLogger('ExtractorAgent')
31
+ skeptic_logger = logging.getLogger('SkepticAgent')
32
+ believer_logger = logging.getLogger('BelieverAgent')
33
+ supervisor_logger = logging.getLogger('SupervisorAgent')
34
+ podcast_logger = logging.getLogger('PodcastProducerAgent')
35
+
36
+ # Load environment variables
37
+ load_dotenv()
38
+
39
+ # Get API keys
40
+ openai_api_key = os.getenv("OPENAI_API_KEY")
41
+ eleven_api_key = os.getenv("ELEVEN_API_KEY")
42
+
43
+ if not openai_api_key:
44
+ raise ValueError("OPENAI_API_KEY not found in environment variables")
45
+ if not eleven_api_key:
46
+ raise ValueError("ELEVEN_API_KEY not found in environment variables")
47
+
48
+ # Initialize the base LLM
49
+ base_llm = ChatOpenAI(
50
+ model_name="gpt-3.5-turbo",
51
+ temperature=0.7,
52
+ openai_api_key=openai_api_key
53
+ )
54
+
55
+ class ExtractorOutput(BaseModel):
56
+ content: str = Field(description="The extracted and refined query")
57
+ key_points: List[str] = Field(description="Key points extracted from the query")
58
+
59
+ class AgentResponse(BaseModel):
60
+ content: str = Field(description="The agent's response")
61
+ chunks: Optional[List[Dict[str, str]]] = Field(description="Relevant context chunks used", default=None)
62
+
63
+ class SupervisorOutput(BaseModel):
64
+ content: str = Field(description="The supervisor's analysis")
65
+ chunks: Dict[str, List[str]] = Field(description="Quadrant-based chunks of the analysis")
66
+
67
+ class PodcastOutput(BaseModel):
68
+ title: str = Field(description="Title of the podcast episode")
69
+ description: str = Field(description="Description of the episode")
70
+ script: str = Field(description="The podcast script")
71
+ summary: str = Field(description="A brief summary of the episode")
72
+ duration_minutes: int = Field(description="Estimated duration in minutes")
73
+
74
+ class ExtractorAgent:
75
+ def __init__(self, tavily_api_key: str):
76
+ self.search_tool = TavilySearchResults(
77
+ api_key=tavily_api_key,
78
+ max_results=5
79
+ )
80
+ self.prompt = ChatPromptTemplate.from_messages([
81
+ ("system", """You are an expert information extractor. Your role is to:
82
+ 1. Extract relevant information from search results
83
+ 2. Organize the information in a clear, structured way
84
+ 3. Focus on factual, verifiable information
85
+ 4. Cite sources when possible"""),
86
+ ("human", "{input}")
87
+ ])
88
+ self.chain = self.prompt | base_llm
89
+
90
+ async def __call__(self, query: str) -> Dict[str, Any]:
91
+ try:
92
+ # Log the incoming query
93
+ extractor_logger.info(f"Processing query: {query}")
94
+
95
+ try:
96
+ # Search using Tavily
97
+ search_results = await self.search_tool.ainvoke(query)
98
+ extractor_logger.debug(f"Search results: {json.dumps(search_results, indent=2)}")
99
+ except Exception as e:
100
+ extractor_logger.error(f"Error in Tavily search: {str(e)}", exc_info=True)
101
+ raise Exception(f"Tavily search failed: {str(e)}")
102
+
103
+ # Format the results
104
+ if isinstance(search_results, list):
105
+ formatted_results = f"Search results for: {query}\n" + "\n".join(
106
+ [str(result) for result in search_results]
107
+ )
108
+ else:
109
+ formatted_results = f"Search results for: {query}\n{search_results}"
110
+
111
+ try:
112
+ # Generate response using the chain
113
+ response = await self.chain.ainvoke({"input": formatted_results})
114
+ extractor_logger.info(f"Generated response: {response.content}")
115
+ except Exception as e:
116
+ extractor_logger.error(f"Error in LLM chain: {str(e)}", exc_info=True)
117
+ raise Exception(f"LLM chain failed: {str(e)}")
118
+
119
+ return {
120
+ "type": "extractor",
121
+ "content": response.content,
122
+ "raw_results": search_results
123
+ }
124
+ except Exception as e:
125
+ extractor_logger.error(f"Error in ExtractorAgent: {str(e)}", exc_info=True)
126
+ raise Exception(f"Error in extractor: {str(e)}")
127
+
128
+ class SkepticAgent:
129
+ def __init__(self):
130
+ self.prompt = ChatPromptTemplate.from_messages([
131
+ ("system", """You are a critical thinker engaging in a thoughtful discussion. While maintaining a balanced perspective, you should:
132
+ - Analyze potential challenges and limitations
133
+ - Consider real-world implications
134
+ - Support arguments with evidence and examples
135
+ - Maintain a respectful and constructive tone
136
+ - Raise important considerations
137
+
138
+ If provided with context information, use it to inform your response while maintaining your analytical perspective.
139
+ Focus on examining risks and important questions from the context.
140
+
141
+ Keep your responses concise and focused on the topic at hand."""),
142
+ ("human", """Context information:
143
+ {chunks}
144
+
145
+ Question/Topic:
146
+ {input}""")
147
+ ])
148
+ self.chain = self.prompt | base_llm
149
+
150
+ async def __call__(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
151
+ skeptic_logger.info(f"Processing input: {input_data['content']}")
152
+ chunks = input_data.get("chunks", [])
153
+ chunks_text = "\n".join(chunks) if chunks else "No additional context provided."
154
+
155
+ response = await self.chain.ainvoke({
156
+ "input": input_data["content"],
157
+ "chunks": chunks_text
158
+ })
159
+ skeptic_logger.info(f"Generated response: {response.content}")
160
+ return {"type": "skeptic", "content": response.content}
161
+
162
+ class BelieverAgent:
163
+ def __init__(self):
164
+ self.prompt = ChatPromptTemplate.from_messages([
165
+ ("system", """You are an optimistic thinker engaging in a thoughtful discussion. While maintaining a balanced perspective, you should:
166
+ - Highlight opportunities and potential benefits
167
+ - Share innovative solutions and possibilities
168
+ - Support arguments with evidence and examples
169
+ - Maintain a constructive and forward-thinking tone
170
+ - Build on existing ideas positively
171
+
172
+ If provided with context information, use it to inform your response while maintaining your optimistic perspective.
173
+ Focus on opportunities and solutions from the context.
174
+
175
+ Keep your responses concise and focused on the topic at hand."""),
176
+ ("human", """Context information:
177
+ {chunks}
178
+
179
+ Question/Topic:
180
+ {input}""")
181
+ ])
182
+ self.chain = self.prompt | base_llm
183
+
184
+ async def __call__(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
185
+ believer_logger.info(f"Processing input: {input_data['content']}")
186
+ chunks = input_data.get("chunks", [])
187
+ chunks_text = "\n".join(chunks) if chunks else "No additional context provided."
188
+
189
+ response = await self.chain.ainvoke({
190
+ "input": input_data["content"],
191
+ "chunks": chunks_text
192
+ })
193
+ believer_logger.info(f"Generated response: {response.content}")
194
+ return {"type": "believer", "content": response.content}
195
+
196
+ class SupervisorAgent:
197
+ def __init__(self):
198
+ self.prompt = ChatPromptTemplate.from_messages([
199
+ ("system", """You are a balanced supervisor. Your role is to:
200
+ 1. Analyze inputs from all agents
201
+ 2. Identify key points and insights
202
+ 3. Balance different perspectives
203
+ 4. Synthesize a comprehensive view
204
+ 5. Provide clear, actionable conclusions
205
+
206
+ Organize your response into these sections:
207
+ - Opportunities: Key possibilities and positive aspects
208
+ - Risks: Important challenges and concerns
209
+ - Questions: Critical questions to consider
210
+ - Solutions: Potential ways forward
211
+
212
+ Focus on creating a balanced, well-reasoned synthesis of all viewpoints.
213
+ Keep your words to 100 words or less."""),
214
+ ("human", "Analyze the following perspectives:\n\nExtractor: {extractor_content}\n\nSkeptic: {skeptic_content}\n\nBeliever: {believer_content}")
215
+ ])
216
+ self.chain = self.prompt | base_llm
217
+
218
+ async def __call__(self, agent_responses: Dict[str, Any]) -> Dict[str, Any]:
219
+ supervisor_logger.info("Processing agent responses:")
220
+ supervisor_logger.info(f"Extractor: {agent_responses['extractor']['content']}")
221
+ supervisor_logger.info(f"Skeptic: {agent_responses['skeptic']['content']}")
222
+ supervisor_logger.info(f"Believer: {agent_responses['believer']['content']}")
223
+
224
+ # Process supervisor's analysis
225
+ response = await self.chain.ainvoke({
226
+ "extractor_content": agent_responses["extractor"]["content"],
227
+ "skeptic_content": agent_responses["skeptic"]["content"],
228
+ "believer_content": agent_responses["believer"]["content"]
229
+ })
230
+
231
+ supervisor_logger.info(f"Generated analysis: {response.content}")
232
+
233
+ # Parse the response into sections
234
+ content = response.content
235
+ sections = {
236
+ "opportunities": [],
237
+ "risks": [],
238
+ "questions": [],
239
+ "solutions": []
240
+ }
241
+
242
+ # Simple parsing of the content into sections
243
+ current_section = None
244
+ for line in content.split('\n'):
245
+ line = line.strip()
246
+ if line.lower().startswith('opportunities:'):
247
+ current_section = "opportunities"
248
+ elif line.lower().startswith('risks:'):
249
+ current_section = "risks"
250
+ elif line.lower().startswith('questions:'):
251
+ current_section = "questions"
252
+ elif line.lower().startswith('solutions:'):
253
+ current_section = "solutions"
254
+ elif line and current_section:
255
+ sections[current_section].append(line)
256
+
257
+ return {
258
+ "type": "supervisor",
259
+ "content": response.content,
260
+ "chunks": sections
261
+ }
262
+
263
+ class PodcastProducerAgent:
264
+ def __init__(self):
265
+ podcast_logger.info("Initializing PodcastProducerAgent")
266
+
267
+ # Initialize the agent with a lower temperature for more consistent output
268
+ llm = ChatOpenAI(
269
+ model_name="gpt-3.5-turbo",
270
+ temperature=0.3,
271
+ openai_api_key=openai_api_key
272
+ )
273
+
274
+ # Create audio storage directory if it doesn't exist
275
+ self.audio_dir = os.path.join(os.path.dirname(__file__), "audio_storage")
276
+ os.makedirs(self.audio_dir, exist_ok=True)
277
+ podcast_logger.info(f"Audio directory: {self.audio_dir}")
278
+
279
+ self.prompt = ChatPromptTemplate.from_messages([
280
+ ("system", """You are an expert podcast producer. Create a single, cohesive podcast script that:
281
+ 1. Introduces the topic clearly
282
+ 2. Presents a balanced debate between perspectives
283
+ 3. Incorporates key insights from the supervisor's analysis:
284
+ - Opportunities and positive aspects
285
+ - Risks and challenges
286
+ - Key questions to consider
287
+ - Potential solutions
288
+ 4. Prioritizes content based on quadrant analysis:
289
+ - Important & Urgent: Address first and emphasize
290
+ - Important & Not Urgent: Cover thoroughly but with less urgency
291
+ - Not Important & Urgent: Mention briefly if relevant
292
+ - Not Important & Not Urgent: Include only if adds value
293
+ 5. Maintains natural conversation flow with clear speaker transitions
294
+ 6. Concludes with actionable takeaways
295
+
296
+ Keep the tone professional but conversational. Format the script with clear speaker indicators and natural pauses."""),
297
+ ("human", """Create a podcast script from this content:
298
+
299
+ Topic: {user_query}
300
+
301
+ Debate Content:
302
+ {debate_content}
303
+
304
+ Supervisor's Analysis:
305
+ {supervisor_content}
306
+
307
+ Quadrant Analysis:
308
+ Important & Urgent:
309
+ {important_urgent}
310
+
311
+ Important & Not Urgent:
312
+ {important_not_urgent}
313
+
314
+ Not Important & Urgent:
315
+ {not_important_urgent}
316
+
317
+ Not Important & Not Urgent:
318
+ {not_important_not_urgent}""")
319
+ ])
320
+ self.chain = self.prompt | llm
321
+
322
+ # Metadata prompt for categorization
323
+ self.metadata_prompt = ChatPromptTemplate.from_messages([
324
+ ("system", """Analyze the debate and provide:
325
+ 1. A category (single word: technology/science/society/politics/economics/culture)
326
+ 2. A short description (3-4 words) of the main topic
327
+ Format: category|short_description"""),
328
+ ("human", "{content}")
329
+ ])
330
+ self.metadata_chain = self.metadata_prompt | llm
331
+
332
+ async def __call__(self, debate_history: list, supervisor_notes: list, user_query: str, supervisor_chunks: dict, quadrant_analysis: dict) -> Dict[str, Any]:
333
+ try:
334
+ podcast_logger.info("Starting podcast production")
335
+
336
+ # Format the debate content
337
+ debate_content = "\n\n".join([
338
+ f"{entry['speaker']}: {entry['content']}"
339
+ for entry in debate_history
340
+ ])
341
+
342
+ # Get the latest supervisor analysis
343
+ supervisor_content = supervisor_notes[-1] if supervisor_notes else ""
344
+
345
+ # Format quadrant content
346
+ important_urgent = "\n".join(quadrant_analysis.get("important_urgent", []))
347
+ important_not_urgent = "\n".join(quadrant_analysis.get("important_not_urgent", []))
348
+ not_important_urgent = "\n".join(quadrant_analysis.get("not_important_urgent", []))
349
+ not_important_not_urgent = "\n".join(quadrant_analysis.get("not_important_not_urgent", []))
350
+
351
+ # Generate the podcast script
352
+ script_response = await self.chain.ainvoke({
353
+ "user_query": user_query,
354
+ "debate_content": debate_content,
355
+ "supervisor_content": supervisor_content,
356
+ "important_urgent": important_urgent,
357
+ "important_not_urgent": important_not_urgent,
358
+ "not_important_urgent": not_important_urgent,
359
+ "not_important_not_urgent": not_important_not_urgent
360
+ })
361
+
362
+ # Get metadata for the podcast
363
+ metadata_response = await self.metadata_chain.ainvoke({
364
+ "content": script_response.content
365
+ })
366
+ category, description = metadata_response.content.strip().split("|")
367
+
368
+ # Clean up filename components
369
+ clean_query = user_query.lower().replace(" ", "_")[:30]
370
+ clean_description = description.lower().replace(" ", "_")
371
+ clean_category = category.lower().strip()
372
+
373
+ try:
374
+ # Create a single filename with hyphens separating main components
375
+ filename = f"{clean_query}-{clean_description}-{clean_category}.mp3"
376
+ filepath = os.path.join(self.audio_dir, filename)
377
+
378
+ # Generate audio file
379
+ from gtts import gTTS
380
+ tts = gTTS(text=script_response.content, lang='en')
381
+ tts.save(filepath)
382
+
383
+ podcast_logger.info(f"Successfully saved audio file: {filepath}")
384
+
385
+ return {
386
+ "type": "podcast",
387
+ "content": script_response.content,
388
+ "audio_file": filename,
389
+ "category": clean_category,
390
+ "description": description,
391
+ "title": f"Debate: {description.title()}"
392
+ }
393
+ except Exception as e:
394
+ podcast_logger.error(f"Error in audio generation: {str(e)}", exc_info=True)
395
+ return {
396
+ "type": "podcast",
397
+ "content": script_response.content,
398
+ "error": f"Audio generation failed: {str(e)}"
399
+ }
400
+
401
+ except Exception as e:
402
+ podcast_logger.error(f"Error in podcast production: {str(e)}", exc_info=True)
403
+ return {
404
+ "type": "podcast",
405
+ "error": f"Podcast production failed: {str(e)}"
406
+ }
407
+
408
+ def create_agents(tavily_api_key: str) -> Dict[str, Any]:
409
+ # Initialize all agents
410
+ extractor = ExtractorAgent(tavily_api_key)
411
+ believer = BelieverAgent()
412
+ skeptic = SkepticAgent()
413
+ supervisor = SupervisorAgent()
414
+ podcast_producer = PodcastProducerAgent()
415
+
416
+ return {
417
+ "extractor": extractor,
418
+ "believer": believer,
419
+ "skeptic": skeptic,
420
+ "supervisor": supervisor,
421
+ "podcast_producer": podcast_producer
422
+ }
server/app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import uvicorn
2
+ from main import app
3
+
4
+ if __name__ == "__main__":
5
+ uvicorn.run(app, host="0.0.0.0", port=7860)
server/context_storage/1_context.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"topic": "are humans dumb", "believer_chunks": ["Humans possess remarkable intelligence that has allowed us to innovate, create art, and advance technology.", "Human intelligence enables us to solve complex problems, adapt to various environments, and collaborate effectively."], "skeptic_chunks": ["The term 'dumb' is subjective and doesn't fully capture the diverse capabilities and potential of human intelligence.", "While humans may make mistakes or exhibit ignorance in certain areas, it doesn't diminish the overall cognitive abilities and achievements of our species."]}
server/main.py ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import FileResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from pydantic import BaseModel
6
+ from typing import List, Dict, Any, Optional
7
+ import os
8
+ import json
9
+ from workflow import create_workflow, run_workflow
10
+ import logging
11
+ from dotenv import load_dotenv
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain.prompts import ChatPromptTemplate
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain_community.vectorstores import Qdrant
16
+ from langchain_openai.embeddings import OpenAIEmbeddings
17
+ from langchain_openai.chat_models import ChatOpenAI
18
+ from operator import itemgetter
19
+ from langchain.schema.output_parser import StrOutputParser
20
+ from langchain.schema.runnable import RunnablePassthrough
21
+
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Configure logging
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Initialize components
31
+ openai_api_key = os.getenv("OPENAI_API_KEY")
32
+ if not openai_api_key:
33
+ raise ValueError("OpenAI API key not configured")
34
+
35
+ # Initialize OpenAI components
36
+ chat_model = ChatOpenAI(
37
+ model_name="gpt-3.5-turbo",
38
+ temperature=0.7,
39
+ openai_api_key=openai_api_key
40
+ )
41
+
42
+ # Initialize FastAPI app
43
+ app = FastAPI()
44
+
45
+ # Get allowed origins from environment variable or use default list
46
+ ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "").split(",")
47
+ if not ALLOWED_ORIGINS:
48
+ ALLOWED_ORIGINS = [
49
+ "http://localhost:5173",
50
+ "http://localhost:3000",
51
+ "https://*.vercel.app", # Allow all Vercel preview and production domains
52
+ ]
53
+
54
+ # Configure CORS for frontend servers
55
+ app.add_middleware(
56
+ CORSMiddleware,
57
+ allow_origins=ALLOWED_ORIGINS,
58
+ allow_origin_regex=r"https://.*\.vercel\.app$", # Allow all Vercel domains
59
+ allow_credentials=True,
60
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD"],
61
+ allow_headers=["*"],
62
+ expose_headers=["*"],
63
+ max_age=3600,
64
+ )
65
+
66
+ # Add security headers middleware
67
+ @app.middleware("http")
68
+ async def add_security_headers(request, call_next):
69
+ response = await call_next(request)
70
+ response.headers["X-Content-Type-Options"] = "nosniff"
71
+ response.headers["X-Frame-Options"] = "DENY"
72
+ response.headers["X-XSS-Protection"] = "1; mode=block"
73
+ response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
74
+ return response
75
+
76
+ # Configure audio storage
77
+ audio_dir = os.path.join(os.path.dirname(__file__), "audio_storage")
78
+ os.makedirs(audio_dir, exist_ok=True)
79
+
80
+ # Mount the audio directory as a static file directory
81
+ app.mount("/audio-files", StaticFiles(directory=audio_dir), name="audio")
82
+
83
+ # Configure context storage
84
+ context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
85
+ os.makedirs(context_dir, exist_ok=True)
86
+
87
+ class ChatMessage(BaseModel):
88
+ content: str
89
+ context: Optional[Dict[str, Any]] = None
90
+ agent_type: Optional[str] = "believer"
91
+
92
+ class WorkflowResponse(BaseModel):
93
+ debate_history: List[Dict[str, str]]
94
+ supervisor_notes: List[str]
95
+ supervisor_chunks: List[Dict[str, List[str]]]
96
+ extractor_data: Dict[str, Any]
97
+ final_podcast: Dict[str, Any]
98
+
99
+ class PodcastChatRequest(BaseModel):
100
+ message: str
101
+
102
+ class PodcastChatResponse(BaseModel):
103
+ response: str
104
+
105
+ @app.get("/audio-list")
106
+ async def list_audio_files():
107
+ """List all available audio files."""
108
+ try:
109
+ files = os.listdir(audio_dir)
110
+ audio_files = []
111
+ for file in files:
112
+ if file.endswith(('.mp3', '.wav')):
113
+ file_path = os.path.join(audio_dir, file)
114
+ audio_files.append({
115
+ "filename": file,
116
+ "path": f"/audio-files/{file}",
117
+ "size": os.path.getsize(file_path)
118
+ })
119
+ return audio_files
120
+ except Exception as e:
121
+ raise HTTPException(status_code=500, detail=str(e))
122
+
123
+ @app.delete("/audio/{filename}")
124
+ async def delete_audio_file(filename: str):
125
+ """Delete an audio file and its corresponding transcript."""
126
+ try:
127
+ # Delete audio file
128
+ file_path = os.path.join(audio_dir, filename)
129
+ if not os.path.exists(file_path):
130
+ raise HTTPException(status_code=404, detail="File not found")
131
+
132
+ # Get all audio files to determine the podcast ID
133
+ audio_files = [f for f in os.listdir(audio_dir) if f.endswith(('.mp3', '.wav'))]
134
+ try:
135
+ # Find the index (0-based) of the file being deleted
136
+ podcast_id = audio_files.index(filename) + 1 # Convert to 1-based ID
137
+ logger.info(f"Deleting podcast with ID: {podcast_id}")
138
+
139
+ # Path to transcripts file
140
+ transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
141
+
142
+ # Update transcripts if file exists
143
+ if os.path.exists(transcripts_file):
144
+ with open(transcripts_file, 'r') as f:
145
+ transcripts = json.load(f)
146
+
147
+ # Remove the transcript at the corresponding index
148
+ if len(transcripts) >= podcast_id:
149
+ transcripts.pop(podcast_id - 1) # Convert back to 0-based index
150
+
151
+ # Save updated transcripts
152
+ with open(transcripts_file, 'w') as f:
153
+ json.dump(transcripts, f, indent=2)
154
+ logger.info(f"Removed transcript for podcast ID {podcast_id}")
155
+
156
+ # Delete the audio file
157
+ os.remove(file_path)
158
+ logger.info(f"Deleted audio file: {filename}")
159
+
160
+ return {"message": "File and transcript deleted successfully"}
161
+
162
+ except ValueError:
163
+ logger.error(f"Could not determine podcast ID for file: {filename}")
164
+ # Still delete the audio file even if transcript removal fails
165
+ os.remove(file_path)
166
+ return {"message": "Audio file deleted, but transcript could not be removed"}
167
+
168
+ except Exception as e:
169
+ logger.error(f"Error in delete_audio_file: {str(e)}")
170
+ raise HTTPException(status_code=500, detail=str(e))
171
+
172
+ @app.get("/audio/{filename}")
173
+ async def get_audio_file(filename: str):
174
+ """Get an audio file by filename."""
175
+ try:
176
+ file_path = os.path.join(audio_dir, filename)
177
+ if not os.path.exists(file_path):
178
+ raise HTTPException(status_code=404, detail="File not found")
179
+ return FileResponse(file_path)
180
+ except Exception as e:
181
+ raise HTTPException(status_code=500, detail=str(e))
182
+
183
+ @app.get("/podcast/{podcast_id}/context")
184
+ async def get_podcast_context(podcast_id: str):
185
+ """Get or generate context for a podcast."""
186
+ try:
187
+ logger.info(f"Getting context for podcast {podcast_id}")
188
+ context_path = os.path.join(context_dir, f"{podcast_id}_context.json")
189
+
190
+ # If context exists, return it
191
+ if os.path.exists(context_path):
192
+ logger.info(f"Found existing context file at {context_path}")
193
+ with open(context_path, 'r') as f:
194
+ return json.load(f)
195
+
196
+ # If no context exists, we need to create it from the podcast content
197
+ logger.info("No existing context found, creating new context")
198
+
199
+ # Get the audio files to find the podcast filename
200
+ files = os.listdir(audio_dir)
201
+ logger.info(f"Found {len(files)} files in audio directory")
202
+ podcast_files = [f for f in files if f.endswith('.mp3')]
203
+ logger.info(f"Found {len(podcast_files)} podcast files: {podcast_files}")
204
+
205
+ if not podcast_files:
206
+ logger.error("No podcast files found")
207
+ raise HTTPException(status_code=404, detail="No podcast files found")
208
+
209
+ # Find the podcast file that matches this ID
210
+ try:
211
+ podcast_index = int(podcast_id) - 1 # Convert 1-based ID to 0-based index
212
+ if podcast_index < 0 or podcast_index >= len(podcast_files):
213
+ raise ValueError(f"Invalid podcast ID: {podcast_id}, total podcasts: {len(podcast_files)}")
214
+ podcast_filename = podcast_files[podcast_index]
215
+ logger.info(f"Selected podcast file: {podcast_filename}")
216
+ except (ValueError, IndexError) as e:
217
+ logger.error(f"Invalid podcast ID: {podcast_id}, Error: {str(e)}")
218
+ raise HTTPException(status_code=404, detail=f"Invalid podcast ID: {podcast_id}")
219
+
220
+ # Extract topic from filename
221
+ try:
222
+ topic = podcast_filename.split('-')[0].replace('_', ' ')
223
+ logger.info(f"Extracted topic: {topic}")
224
+ except Exception as e:
225
+ logger.error(f"Error extracting topic from filename: {podcast_filename}, Error: {str(e)}")
226
+ raise HTTPException(status_code=500, detail=f"Error extracting topic from filename: {str(e)}")
227
+
228
+ # Initialize OpenAI chat model for content analysis
229
+ try:
230
+ chat_model = ChatOpenAI(
231
+ model_name="gpt-3.5-turbo",
232
+ temperature=0.3,
233
+ openai_api_key=openai_api_key
234
+ )
235
+ logger.info("Successfully initialized ChatOpenAI")
236
+ except Exception as e:
237
+ logger.error(f"Error initializing ChatOpenAI: {str(e)}")
238
+ raise HTTPException(status_code=500, detail=f"Error initializing chat model: {str(e)}")
239
+
240
+ # Create prompt template for content analysis
241
+ prompt = ChatPromptTemplate.from_messages([
242
+ ("system", """You are an expert content analyzer. Your task is to:
243
+ 1. Analyze the given topic and create balanced, factual content chunks about it
244
+ 2. Generate two types of chunks:
245
+ - Believer chunks: Positive aspects, opportunities, and solutions related to the topic
246
+ - Skeptic chunks: Challenges, risks, and critical questions about the topic
247
+ 3. Each chunk should be self-contained and focused on a single point
248
+ 4. Keep chunks concise (2-3 sentences each)
249
+ 5. Ensure all content is factual and balanced
250
+
251
+ Format your response as a JSON object with two arrays:
252
+ {{
253
+ "believer_chunks": ["chunk1", "chunk2", ...],
254
+ "skeptic_chunks": ["chunk1", "chunk2", ...]
255
+ }}"""),
256
+ ("human", "Create balanced content chunks about this topic: {topic}")
257
+ ])
258
+
259
+ # Generate content chunks
260
+ chain = prompt | chat_model
261
+
262
+ try:
263
+ logger.info(f"Generating content chunks for topic: {topic}")
264
+ response = await chain.ainvoke({
265
+ "topic": topic
266
+ })
267
+ logger.info("Successfully received response from OpenAI")
268
+
269
+ # Parse the response content as JSON
270
+ try:
271
+ content_chunks = json.loads(response.content)
272
+ logger.info(f"Successfully parsed response JSON with {len(content_chunks.get('believer_chunks', []))} believer chunks and {len(content_chunks.get('skeptic_chunks', []))} skeptic chunks")
273
+ except json.JSONDecodeError as e:
274
+ logger.error(f"Error parsing response JSON: {str(e)}, Response content: {response.content}")
275
+ raise HTTPException(status_code=500, detail=f"Error parsing content chunks: {str(e)}")
276
+
277
+ # Create the context object
278
+ context = {
279
+ "topic": topic,
280
+ "believer_chunks": content_chunks.get("believer_chunks", []),
281
+ "skeptic_chunks": content_chunks.get("skeptic_chunks", [])
282
+ }
283
+
284
+ # Save the context
285
+ try:
286
+ with open(context_path, 'w') as f:
287
+ json.dump(context, f)
288
+ logger.info(f"Saved new context to {context_path}")
289
+ except Exception as e:
290
+ logger.error(f"Error saving context file: {str(e)}")
291
+ raise HTTPException(status_code=500, detail=f"Error saving context file: {str(e)}")
292
+
293
+ return context
294
+
295
+ except Exception as e:
296
+ logger.error(f"Error generating content chunks: {str(e)}")
297
+ raise HTTPException(
298
+ status_code=500,
299
+ detail=f"Error generating content chunks: {str(e)}"
300
+ )
301
+
302
+ except HTTPException:
303
+ raise
304
+ except Exception as e:
305
+ logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
306
+ raise HTTPException(status_code=500, detail=str(e))
307
+
308
+ @app.post("/chat")
309
+ async def chat(message: ChatMessage):
310
+ """Process a chat message with context-awareness."""
311
+ try:
312
+ # Log incoming message
313
+ logger.info(f"Received chat message: {message}")
314
+
315
+ # Get API key
316
+ tavily_api_key = os.getenv("TAVILY_API_KEY")
317
+ if not tavily_api_key:
318
+ logger.error("Tavily API key not found")
319
+ raise HTTPException(status_code=500, detail="Tavily API key not configured")
320
+
321
+ # Initialize the workflow
322
+ try:
323
+ workflow = create_workflow(tavily_api_key)
324
+ logger.info("Workflow created successfully")
325
+ except Exception as e:
326
+ logger.error(f"Error creating workflow: {str(e)}")
327
+ raise HTTPException(status_code=500, detail=f"Error creating workflow: {str(e)}")
328
+
329
+ # Run the workflow with context
330
+ try:
331
+ result = await run_workflow(
332
+ workflow,
333
+ message.content,
334
+ agent_type=message.agent_type,
335
+ context=message.context
336
+ )
337
+ logger.info("Workflow completed successfully")
338
+ except Exception as e:
339
+ logger.error(f"Error running workflow: {str(e)}")
340
+ raise HTTPException(status_code=500, detail=f"Error running workflow: {str(e)}")
341
+
342
+ return WorkflowResponse(**result)
343
+ except Exception as e:
344
+ logger.error(f"Error in chat endpoint: {str(e)}", exc_info=True)
345
+ raise HTTPException(status_code=500, detail=str(e))
346
+
347
+ @app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
348
+ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
349
+ """Handle chat messages for a specific podcast."""
350
+ try:
351
+ logger.info(f"Processing chat message for podcast {podcast_id}")
352
+
353
+ # Path to transcripts file
354
+ transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
355
+
356
+ # Check if transcripts file exists
357
+ if not os.path.exists(transcripts_file):
358
+ raise HTTPException(status_code=404, detail="Transcripts file not found")
359
+
360
+ # Read transcripts
361
+ with open(transcripts_file, 'r') as f:
362
+ transcripts = json.load(f)
363
+
364
+ # Convert podcast_id to zero-based index
365
+ try:
366
+ podcast_index = int(podcast_id) - 1
367
+ if podcast_index < 0 or podcast_index >= len(transcripts):
368
+ raise ValueError(f"Invalid podcast ID: {podcast_id}")
369
+ except ValueError as e:
370
+ raise HTTPException(status_code=404, detail=str(e))
371
+
372
+ # Get podcast transcript
373
+ podcast_transcript = transcripts[podcast_index]["podcastScript"]
374
+
375
+ # Split text into chunks
376
+ text_splitter = RecursiveCharacterTextSplitter(
377
+ chunk_size=500,
378
+ chunk_overlap=50,
379
+ length_function=len,
380
+ )
381
+
382
+ # Use split_text for strings instead of split_documents
383
+ chunks = text_splitter.split_text(podcast_transcript)
384
+
385
+ # Initialize embedding model
386
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
387
+
388
+ # Create a unique collection name for this podcast
389
+ collection_name = f"podcast_{podcast_id}"
390
+
391
+ # Initialize Qdrant with local storage
392
+ vectorstore = Qdrant.from_texts(
393
+ texts=chunks,
394
+ embedding=embedding_model,
395
+ location=":memory:", # Use in-memory storage
396
+ collection_name=collection_name
397
+ )
398
+
399
+ # Configure the retriever with search parameters
400
+ qdrant_retriever = vectorstore.as_retriever(
401
+ search_type="similarity",
402
+ search_kwargs={"k": 3} # Get top 3 most relevant chunks
403
+ )
404
+
405
+ base_rag_prompt_template = """\
406
+ You are a helpful podcast assistant. Answer the user's question based on the provided context from the podcast transcript.
407
+ If you can't find the answer in the context, just say "I don't have enough information to answer that question."
408
+ Keep your responses concise and focused on the question.
409
+
410
+ Context:
411
+ {context}
412
+
413
+ Question:
414
+ {question}
415
+ """
416
+
417
+ base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
418
+ base_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
419
+
420
+ # Create the RAG chain
421
+ def format_docs(docs):
422
+ return "\n\n".join(doc.page_content for doc in docs)
423
+
424
+ # Add logging for the retrieved documents and final prompt
425
+ def get_context_and_log(input_dict):
426
+ context = format_docs(qdrant_retriever.get_relevant_documents(input_dict["question"]))
427
+ logger.info("Retrieved context from podcast:")
428
+ logger.info("-" * 50)
429
+ logger.info(f"Context:\n{context}")
430
+ logger.info("-" * 50)
431
+ logger.info(f"Question: {input_dict['question']}")
432
+ logger.info("-" * 50)
433
+ return {"context": context, "question": input_dict["question"]}
434
+
435
+ # Create the chain
436
+ chain = (
437
+ RunnablePassthrough()
438
+ | get_context_and_log
439
+ | base_rag_prompt
440
+ | base_llm
441
+ )
442
+
443
+ # Get response
444
+ response = chain.invoke({"question": request.message})
445
+
446
+ return PodcastChatResponse(response=response.content)
447
+
448
+ except Exception as e:
449
+ logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
450
+ raise HTTPException(status_code=500, detail=str(e))
451
+
452
+ if __name__ == "__main__":
453
+ import uvicorn
454
+ uvicorn.run(app, host="0.0.0.0", port=8000)
server/qdrant_db/.lock ADDED
@@ -0,0 +1 @@
 
 
1
+ tmp lock file
server/qdrant_db/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"collections": {}, "aliases": {}}
server/requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.2
2
+ uvicorn==0.27.1
3
+ pydantic>=2.7.4
4
+ langchain>=0.3.15
5
+ langchain-community>=0.3.15
6
+ langchain-openai>=0.3.2
7
+ langchain-qdrant>=0.1.1
8
+ openai>=1.58.1
9
+ langgraph>=0.2.67
10
+ python-dotenv==1.0.1
11
+ gtts==2.5.1
12
+ pyyaml==6.0.1
13
+ tiktoken>=0.6.0
14
+ qdrant-client>=1.7.0
server/test_workflow.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from workflow import create_workflow, run_workflow, TRANSCRIPTS_FILE
5
+ import json
6
+ from datetime import datetime
7
+ import traceback
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ def log_step(step: str, details: str = None):
13
+ """Print a formatted step log"""
14
+ timestamp = datetime.now().strftime("%H:%M:%S")
15
+ print(f"\n[{timestamp}] 🔄 {step}")
16
+ if details:
17
+ print(f" {details}")
18
+
19
+ def log_agent(agent: str, message: str):
20
+ """Print a formatted agent message"""
21
+ timestamp = datetime.now().strftime("%H:%M:%S")
22
+ agent_icons = {
23
+ "extractor": "🔍",
24
+ "skeptic": "🤔",
25
+ "believer": "💡",
26
+ "supervisor": "👀",
27
+ "storage": "📦",
28
+ "podcast": "🎙️",
29
+ "error": "❌",
30
+ "step": "➡️"
31
+ }
32
+ icon = agent_icons.get(agent.lower(), "💬")
33
+ print(f"\n[{timestamp}] {icon} {agent}:")
34
+ print(f" {message}")
35
+
36
+ def check_api_keys():
37
+ """Check if all required API keys are present"""
38
+ required_keys = {
39
+ "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
40
+ "ELEVEN_API_KEY": os.getenv("ELEVEN_API_KEY"),
41
+ "TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")
42
+ }
43
+
44
+ missing_keys = [key for key, value in required_keys.items() if not value]
45
+ if missing_keys:
46
+ raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")
47
+
48
+ return required_keys["TAVILY_API_KEY"]
49
+
50
+ async def test_transcript_saving(workflow, query: str):
51
+ """Test that transcripts are properly saved to podcasts.json"""
52
+ try:
53
+ # Get initial transcript count
54
+ initial_transcripts = []
55
+ if os.path.exists(TRANSCRIPTS_FILE):
56
+ with open(TRANSCRIPTS_FILE, 'r') as f:
57
+ initial_transcripts = json.load(f)
58
+ initial_count = len(initial_transcripts)
59
+
60
+ # Run workflow
61
+ result = await run_workflow(workflow, query)
62
+
63
+ # Verify transcript was saved
64
+ if not os.path.exists(TRANSCRIPTS_FILE):
65
+ return False, "Transcripts file was not created"
66
+
67
+ with open(TRANSCRIPTS_FILE, 'r') as f:
68
+ transcripts = json.load(f)
69
+
70
+ if len(transcripts) <= initial_count:
71
+ return False, "No new transcript was added"
72
+
73
+ latest_transcript = transcripts[-1]
74
+ if not all(key in latest_transcript for key in ["id", "podcastScript", "topic"]):
75
+ return False, "Transcript is missing required fields"
76
+
77
+ if latest_transcript["topic"] != query:
78
+ return False, f"Topic mismatch. Expected: {query}, Got: {latest_transcript['topic']}"
79
+
80
+ return True, "Transcript was saved successfully"
81
+
82
+ except Exception as e:
83
+ return False, f"Error in transcript test: {str(e)}\n{traceback.format_exc()}"
84
+
85
+ async def test_single_turn(workflow_graph, query: str):
86
+ """Test a single turn of the workflow"""
87
+ result = await run_workflow(workflow_graph, query)
88
+ return len(result["debate_history"]) > 0
89
+
90
+ async def test_debate_length(workflow, query):
91
+ """Test that debate history does not exceed 20 messages"""
92
+ result = await run_workflow(workflow, query)
93
+ return len(result["debate_history"]) <= 20
94
+
95
+ async def test_podcast_generation(workflow, query):
96
+ """Test podcast generation functionality"""
97
+ try:
98
+ result = await run_workflow(workflow, query)
99
+
100
+ # Check for podcast data
101
+ if "final_podcast" not in result:
102
+ return False, "No podcast data in result"
103
+
104
+ podcast_data = result["final_podcast"]
105
+
106
+ # Check for errors in podcast generation
107
+ if "error" in podcast_data:
108
+ return False, f"Podcast generation error: {podcast_data['error']}"
109
+
110
+ # Verify script generation
111
+ if not podcast_data.get("content"):
112
+ return False, "No podcast script generated"
113
+
114
+ # Verify audio file generation
115
+ if not podcast_data.get("audio_file"):
116
+ return False, "No audio file generated"
117
+
118
+ # Check if audio file exists
119
+ audio_path = os.path.join(os.path.dirname(__file__), "audio_storage", podcast_data["audio_file"])
120
+ if not os.path.exists(audio_path):
121
+ return False, f"Audio file not found at {audio_path}"
122
+
123
+ # Check file size
124
+ file_size = os.path.getsize(audio_path)
125
+ if file_size == 0:
126
+ return False, "Audio file is empty"
127
+
128
+ # Check if transcript was saved
129
+ transcript_success, transcript_message = await test_transcript_saving(workflow, query)
130
+ if not transcript_success:
131
+ return False, f"Transcript saving failed: {transcript_message}"
132
+
133
+ return True, f"Podcast generated successfully (file size: {file_size} bytes)"
134
+
135
+ except Exception as e:
136
+ return False, f"Error in podcast test: {str(e)}\n{traceback.format_exc()}"
137
+
138
+ async def run_all_tests():
139
+ log_step("Running all tests")
140
+
141
+ try:
142
+ # Check API keys
143
+ tavily_api_key = check_api_keys()
144
+
145
+ # Create workflow
146
+ workflow = create_workflow(tavily_api_key)
147
+
148
+ # Test queries
149
+ queries = [
150
+ "What are the environmental impacts of electric vehicles?",
151
+ "How does artificial intelligence impact healthcare?",
152
+ "What are the pros and cons of remote work?",
153
+ "Discuss the future of space exploration"
154
+ ]
155
+
156
+ results = {}
157
+ for query in queries:
158
+ try:
159
+ log_step(f"Testing query", query)
160
+
161
+ # Test transcript saving
162
+ transcript_success, transcript_message = await test_transcript_saving(workflow, query)
163
+ log_agent("step", f"Transcript test: {transcript_message}")
164
+
165
+ result = await run_workflow(workflow, query)
166
+
167
+ podcast_success, podcast_message = await test_podcast_generation(workflow, query)
168
+
169
+ results[query] = {
170
+ "success": True,
171
+ "debate_length": len(result["debate_history"]),
172
+ "supervisor_notes": len(result["supervisor_notes"]),
173
+ "transcript_saved": transcript_success,
174
+ "transcript_status": transcript_message,
175
+ "podcast_generated": podcast_success,
176
+ "podcast_status": podcast_message,
177
+ "timestamp": datetime.now().isoformat()
178
+ }
179
+
180
+ log_agent("step", f"Test completed for: {query}")
181
+
182
+ except Exception as e:
183
+ results[query] = {
184
+ "success": False,
185
+ "error": str(e),
186
+ "traceback": traceback.format_exc(),
187
+ "timestamp": datetime.now().isoformat()
188
+ }
189
+ log_agent("error", f"Test failed for: {query}\n{str(e)}")
190
+
191
+ # Save results
192
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
193
+ filename = f"test_results_{timestamp}.json"
194
+ with open(filename, "w") as f:
195
+ json.dump(results, f, indent=2)
196
+
197
+ log_step("Results saved", f"File: {filename}")
198
+ print("\nTest Results:")
199
+ print(json.dumps(results, indent=2))
200
+
201
+ return results
202
+
203
+ except Exception as e:
204
+ log_agent("error", f"Critical error in tests: {str(e)}\n{traceback.format_exc()}")
205
+ raise
206
+
207
+ async def test_workflow():
208
+ log_step("Starting workflow test")
209
+
210
+ try:
211
+ # Check API keys
212
+ tavily_api_key = check_api_keys()
213
+
214
+ # Create the workflow
215
+ log_step("Creating workflow graph")
216
+ workflow_graph = create_workflow(tavily_api_key)
217
+
218
+ # Test query
219
+ test_query = "Should artificial intelligence be regulated?"
220
+ log_step("Test Query", test_query)
221
+
222
+ # Run the workflow
223
+ log_step("Running workflow")
224
+ result = await run_workflow(workflow_graph, test_query)
225
+
226
+ # Test transcript saving
227
+ log_step("Testing transcript saving")
228
+ transcript_success, transcript_message = await test_transcript_saving(workflow_graph, test_query)
229
+ log_agent("step", f"Transcript test: {transcript_message}")
230
+
231
+ # Print extractor results
232
+ log_step("Information Extraction Phase")
233
+ if "extractor_data" in result:
234
+ log_agent("Extractor", result["extractor_data"].get("content", "No content"))
235
+
236
+ # Print debate history
237
+ log_step("Debate Phase")
238
+ print("\nDebate Timeline:")
239
+ for i, entry in enumerate(result["debate_history"], 1):
240
+ log_agent(entry["speaker"], entry["content"])
241
+ if i < len(result["supervisor_notes"]):
242
+ log_agent("Supervisor", f"Analysis of Turn {i}:\n {result['supervisor_notes'][i]}")
243
+
244
+ # Print final supervisor analysis
245
+ log_step("Final Supervisor Analysis")
246
+ if result["supervisor_notes"]:
247
+ log_agent("Supervisor", result["supervisor_notes"][-1])
248
+
249
+ # Print podcast results
250
+ log_step("Podcast Production Phase")
251
+ if "final_podcast" in result:
252
+ podcast_data = result["final_podcast"]
253
+ if "error" in podcast_data:
254
+ log_agent("Podcast", f"Error: {podcast_data['error']}")
255
+ else:
256
+ log_agent("Podcast", "Script:\n" + podcast_data["content"])
257
+ if podcast_data.get("audio_file"):
258
+ audio_path = os.path.join(os.path.dirname(__file__), "audio_storage", podcast_data["audio_file"])
259
+ file_size = os.path.getsize(audio_path) if os.path.exists(audio_path) else 0
260
+ log_agent("Podcast", f"Audio file saved as: {podcast_data['audio_file']} (size: {file_size} bytes)")
261
+
262
+ # Save results
263
+ log_step("Saving Results")
264
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
265
+ filename = f"test_results_{timestamp}.json"
266
+ with open(filename, "w") as f:
267
+ json.dump({
268
+ "timestamp": datetime.now().isoformat(),
269
+ "query": test_query,
270
+ "workflow_results": result,
271
+ "transcript_saved": transcript_success,
272
+ "transcript_status": transcript_message
273
+ }, f, indent=2)
274
+ log_step("Results Saved", f"File: {filename}")
275
+
276
+ except Exception as e:
277
+ log_step("ERROR", f"Workflow execution failed: {str(e)}")
278
+ print("\nFull traceback:")
279
+ print(traceback.format_exc())
280
+ raise
281
+
282
+ if __name__ == "__main__":
283
+ print("\n" + "="*50)
284
+ print("🤖 Starting AI Debate Workflow Test")
285
+ print("="*50)
286
+
287
+ try:
288
+ asyncio.run(test_workflow())
289
+ print("\n" + "="*50)
290
+ print("✅ Test Complete")
291
+ print("="*50)
292
+
293
+ # Run comprehensive tests
294
+ print("\nRunning comprehensive tests...")
295
+ asyncio.run(run_all_tests())
296
+ print("\n" + "="*50)
297
+ print("✅ All Tests Complete")
298
+ print("="*50)
299
+ except Exception as e:
300
+ print("\n" + "="*50)
301
+ print(f"❌ Tests Failed: {str(e)}")
302
+ print("="*50)
303
+ raise
server/transcripts/podcasts.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
server/workflow.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Annotated, TypedDict, Union, Optional
2
+ from langgraph.graph import Graph, END
3
+ from agents import create_agents
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import json
7
+ import uuid
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Create transcripts directory if it doesn't exist
13
+ TRANSCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "transcripts")
14
+ os.makedirs(TRANSCRIPTS_DIR, exist_ok=True)
15
+ TRANSCRIPTS_FILE = os.path.join(TRANSCRIPTS_DIR, "podcasts.json")
16
+
17
+ def save_transcript(podcast_script: str, user_query: str) -> None:
18
+ """Save podcast transcript to JSON file."""
19
+ # Create new transcript entry
20
+ transcript = {
21
+ "id": str(uuid.uuid4()),
22
+ "podcastScript": podcast_script,
23
+ "topic": user_query
24
+ }
25
+
26
+ try:
27
+ # Load existing transcripts
28
+ if os.path.exists(TRANSCRIPTS_FILE):
29
+ with open(TRANSCRIPTS_FILE, 'r') as f:
30
+ transcripts = json.load(f)
31
+ else:
32
+ transcripts = []
33
+
34
+ # Append new transcript
35
+ transcripts.append(transcript)
36
+
37
+ # Save updated transcripts
38
+ with open(TRANSCRIPTS_FILE, 'w') as f:
39
+ json.dump(transcripts, f, indent=2)
40
+
41
+ except Exception as e:
42
+ print(f"Error saving transcript: {str(e)}")
43
+
44
+ class AgentState(TypedDict):
45
+ messages: List[Dict[str, Any]]
46
+ current_agent: str
47
+ debate_turns: int
48
+ extractor_data: Dict[str, Any]
49
+ debate_history: List[Dict[str, Any]]
50
+ supervisor_notes: List[str]
51
+ supervisor_chunks: List[Dict[str, List[str]]]
52
+ final_podcast: Dict[str, Any]
53
+ agent_type: str
54
+ context: Optional[Dict[str, Any]]
55
+
56
+ def create_workflow(tavily_api_key: str):
57
+ # Initialize all agents
58
+ agents = create_agents(tavily_api_key)
59
+
60
+ # Create the graph
61
+ workflow = Graph()
62
+
63
+ # Define the extractor node function
64
+ async def run_extractor(state: AgentState) -> Dict[str, Any]:
65
+ query = state["messages"][-1]["content"]
66
+ print(f"Extractor processing query: {query}")
67
+
68
+ try:
69
+ response = await agents["extractor"](query)
70
+ print(f"Extractor response: {response}")
71
+
72
+ # Update state
73
+ state["extractor_data"] = response
74
+
75
+ # Get initial supervisor analysis
76
+ supervisor_analysis = await agents["supervisor"]({
77
+ "extractor": response,
78
+ "skeptic": {"content": "Not started"},
79
+ "believer": {"content": "Not started"}
80
+ })
81
+ print(f"Initial supervisor analysis: {supervisor_analysis}")
82
+
83
+ state["supervisor_notes"].append(supervisor_analysis["content"])
84
+ state["supervisor_chunks"].append(supervisor_analysis.get("chunks", {}))
85
+
86
+ # Move to debate phase
87
+ state["current_agent"] = "debate"
88
+ return state
89
+ except Exception as e:
90
+ print(f"Error in extractor: {str(e)}")
91
+ raise Exception(f"Error in extractor: {str(e)}")
92
+
93
+ # Define the debate node function
94
+ async def run_debate(state: AgentState) -> Dict[str, Any]:
95
+ print(f"Debate turn {state['debate_turns']}")
96
+
97
+ try:
98
+ if state["debate_turns"] == 0:
99
+ # First turn: both agents respond to extractor
100
+ print("Starting first debate turn")
101
+
102
+ # If we have context, use it to inform the agents' responses
103
+ context = state.get("context", {})
104
+ agent_chunks = context.get("agent_chunks", []) if context else []
105
+
106
+ # Create context-aware input for agents
107
+ context_input = {
108
+ "content": state["extractor_data"]["content"],
109
+ "chunks": agent_chunks
110
+ }
111
+
112
+ skeptic_response = await agents["skeptic"](context_input)
113
+ believer_response = await agents["believer"](context_input)
114
+
115
+ state["debate_history"].extend([
116
+ {"speaker": "skeptic", "content": skeptic_response["content"]},
117
+ {"speaker": "believer", "content": believer_response["content"]}
118
+ ])
119
+ print(f"First turn responses added: {state['debate_history'][-2:]}")
120
+ else:
121
+ # Alternating responses based on agent type if specified
122
+ if state["agent_type"] in ["believer", "skeptic"]:
123
+ current_speaker = state["agent_type"]
124
+ else:
125
+ # Default alternating behavior
126
+ last_speaker = state["debate_history"][-1]["speaker"]
127
+ current_speaker = "believer" if last_speaker == "skeptic" else "skeptic"
128
+
129
+ print(f"Processing response for {current_speaker}")
130
+
131
+ # Create context-aware input
132
+ context = state.get("context", {})
133
+ agent_chunks = context.get("agent_chunks", []) if context else []
134
+ context_input = {
135
+ "content": state["debate_history"][-1]["content"],
136
+ "chunks": agent_chunks
137
+ }
138
+
139
+ response = await agents[current_speaker](context_input)
140
+
141
+ state["debate_history"].append({
142
+ "speaker": current_speaker,
143
+ "content": response["content"]
144
+ })
145
+ print(f"Added response: {state['debate_history'][-1]}")
146
+
147
+ # Add supervisor note and chunks
148
+ supervisor_analysis = await agents["supervisor"]({
149
+ "extractor": state["extractor_data"],
150
+ "skeptic": {"content": state["debate_history"][-1]["content"]},
151
+ "believer": {"content": state["debate_history"][-2]["content"] if len(state["debate_history"]) > 1 else "Not started"}
152
+ })
153
+ print(f"Supervisor analysis: {supervisor_analysis}")
154
+
155
+ state["supervisor_notes"].append(supervisor_analysis["content"])
156
+ state["supervisor_chunks"].append(supervisor_analysis.get("chunks", {}))
157
+
158
+ state["debate_turns"] += 1
159
+ print(f"Debate turn {state['debate_turns']} completed")
160
+
161
+ # End the workflow after 2 debate turns
162
+ if state["debate_turns"] >= 2:
163
+ state["current_agent"] = "podcast"
164
+ print("Moving to podcast production")
165
+
166
+ return state
167
+ except Exception as e:
168
+ print(f"Error in debate: {str(e)}")
169
+ raise Exception(f"Error in debate: {str(e)}")
170
+
171
+ async def run_podcast_producer(state: AgentState) -> Dict[str, Any]:
172
+ print("Starting podcast production")
173
+
174
+ try:
175
+ # Create podcast from debate
176
+ podcast_result = await agents["podcast_producer"](
177
+ state["debate_history"],
178
+ state["supervisor_notes"],
179
+ state["messages"][-1]["content"], # Pass the original user query
180
+ state["supervisor_chunks"],
181
+ {} # Empty quadrant analysis since we removed storage manager
182
+ )
183
+ print(f"Podcast production result: {podcast_result}")
184
+
185
+ # Save transcript to JSON file
186
+ save_transcript(
187
+ podcast_script=podcast_result["content"],
188
+ user_query=state["messages"][-1]["content"]
189
+ )
190
+
191
+ # Store the result
192
+ state["final_podcast"] = podcast_result
193
+
194
+ # End the workflow
195
+ state["current_agent"] = END
196
+ return state
197
+ except Exception as e:
198
+ print(f"Error in podcast production: {str(e)}")
199
+ raise Exception(f"Error in podcast production: {str(e)}")
200
+
201
+ # Add nodes to the graph
202
+ workflow.add_node("extractor", run_extractor)
203
+ workflow.add_node("debate", run_debate)
204
+ workflow.add_node("podcast", run_podcast_producer)
205
+
206
+ # Set the entry point
207
+ workflow.set_entry_point("extractor")
208
+
209
+ # Add edges
210
+ workflow.add_edge("extractor", "debate")
211
+
212
+ # Add conditional edges for debate
213
+ workflow.add_conditional_edges(
214
+ "debate",
215
+ lambda x: "podcast" if x["debate_turns"] >= 2 else "debate"
216
+ )
217
+
218
+ # Add edge from podcast to end
219
+ workflow.add_edge("podcast", END)
220
+
221
+ # Compile the graph
222
+ return workflow.compile()
223
+
224
+ async def run_workflow(
225
+ graph: Graph,
226
+ query: str,
227
+ agent_type: str = "believer",
228
+ context: Optional[Dict[str, Any]] = None
229
+ ) -> Dict[str, Any]:
230
+ """Run the workflow with a given query."""
231
+ # Initialize the state
232
+ initial_state = {
233
+ "messages": [{"role": "user", "content": query}],
234
+ "current_agent": "extractor",
235
+ "debate_turns": 0,
236
+ "extractor_data": {},
237
+ "debate_history": [],
238
+ "supervisor_notes": [],
239
+ "supervisor_chunks": [],
240
+ "final_podcast": {},
241
+ "agent_type": agent_type,
242
+ "context": context
243
+ }
244
+
245
+ # Run the graph
246
+ result = await graph.ainvoke(initial_state)
247
+
248
+ return {
249
+ "debate_history": result["debate_history"],
250
+ "supervisor_notes": result["supervisor_notes"],
251
+ "supervisor_chunks": result["supervisor_chunks"],
252
+ "extractor_data": result["extractor_data"],
253
+ "final_podcast": result["final_podcast"]
254
+ }