Spaces:

Dediro
/

cynthesis-v4

Running

File size: 6,994 Bytes

f351557
a748b6b
 
 
 
ddf7e81
a748b6b
4c40f71
43c3380
a748b6b
1316dc2
43c3380
 
 
 
4c40f71
0da52b8
 
43c3380
a748b6b
 
 
 
 
 
 
 
 
 
9588b1b
43c3380
1316dc2
a748b6b
f351557
a748b6b
0da52b8
a748b6b
0da52b8
bb3140d
0da52b8
a748b6b
 
bb3140d
0da52b8
a748b6b
0da52b8
ddf7e81
 
 
 
0da52b8
a748b6b
0da52b8
ddf7e81
 
0da52b8
a748b6b
0da52b8
1316dc2
0da52b8
a748b6b
43c3380
1316dc2
0da52b8
a748b6b
 
f351557
0da52b8
ddf7e81
 
 
 
 
3b48bd4
ddf7e81
 
 
 
 
 
 
 
3b48bd4
ddf7e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01c2cf7
ddf7e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b48bd4
ddf7e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b48bd4
ddf7e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d678e39
ddf7e81
 
 
 
 
 
 
 
 
43c3380
bb3140d
1316dc2
 
 
573ce4d
1316dc2

import json
import logging
import os
from datetime import datetime

import pandas as pd
import boto3
import gradio as gr
from dotenv import load_dotenv

from agents.accountability_agent import AccountabilityAgent
from agents.outline_agent import OutlineAgent
from agents.research_agent import ResearchAgent
from agents.synthesis_agent import SynthesisAgent
from agents.thinking_ladder_agent import ThinkingLadderAgent

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

load_dotenv()


outline_agent = OutlineAgent()
research_agent = ResearchAgent()
thinking_ladder_agent = ThinkingLadderAgent()
synthesis_agent = SynthesisAgent()
accountability_agent = AccountabilityAgent()





def main(query: str) -> tuple[str, str, float]:
    logs = []
    log_entry = {}

    logging.info(f"Received query: {query}")
    log_entry["query"] = query

    main_title, themes, metrics_a = outline_agent.run(query)
    logging.info(f"OutlineAgent output - Main Title: {main_title}, Themes: {themes}, Metrics: {metrics_a}")
    log_entry["outline_agent"] = {"main_title": main_title, "themes": themes, "metrics": metrics_a}

    sources, metrics_b = research_agent.run(themes)
    logging.info(f"ResearchAgent output - Sources: {sources}, Metrics: {metrics_b}")
    log_entry["research_agent"] = {"sources": sources, "metrics": metrics_b}

    filtered_sources = {key: sources[key] for key in sources if key in [theme['title'] for theme in themes[0:2]]}


    classified_sources, metrics_c = thinking_ladder_agent.run(themes[0:2], filtered_sources)
    logging.info(f"ThinkingLadderAgent output - Classified Sources: {classified_sources}, Metrics: {metrics_c}")
    log_entry["thinking_ladder_agent"] = {"classified_sources": classified_sources, "metrics": metrics_c}

    complete_synthesis,synthesis,all_sub_syntheses,all_sub_theme, metrics_d = synthesis_agent.run(main_title, themes[0:2], classified_sources)

    logging.info(f"SynthesisAgent output - Synthesis: {synthesis}, Metrics: {metrics_d}")
    log_entry["synthesis_agent"] = {"synthesis": synthesis, "metrics": metrics_d}

    accountability, metrics_e = accountability_agent.run(query, synthesis)
    logging.info(f"AccountabilityAgent output - Accountability: {accountability}, Metrics: {metrics_e}")
    log_entry["accountability_agent"] = {"accountability": accountability, "metrics": metrics_e}

    cost = sum(list(map(lambda metrics: metrics["cost"], [metrics_a, metrics_b, metrics_c, metrics_d, metrics_e])))
    logging.info(f"Total Cost: {cost}")
    log_entry["total_cost"] = cost

    logs.append(log_entry)

        

    filenames = []

    # Outline Agent
    timestamp = str(int(datetime.now().timestamp()))
    log_entry['outline_agent']['timestamp'] = timestamp
    outline_agent_filename = "outline_agent_" + timestamp + ".json"
    with open(outline_agent_filename, 'w', encoding='utf-8') as json_file:
        json.dump(log_entry['outline_agent'], json_file, ensure_ascii=False, indent=4)
    filenames.append(outline_agent_filename)
    print(f"Data has been written to {outline_agent_filename}")

    # Research Agent
    timestamp = str(int(datetime.now().timestamp()))

    # Convert each source DataFrame to JSON
    output = {
        'timestamp': timestamp,
        'sources': []
    }

    for source_name, source_data in sources.items():
        source_entry = {'name': source_name}

        if 'web_sources' in source_data:
            source_entry['web_sources'] = json.loads(source_data['web_sources'].to_json(orient='records', force_ascii=False))

        if 'dataset_sources' in source_data:
            source_entry['dataset_sources'] = json.loads(source_data['dataset_sources'].to_json(orient='records', force_ascii=False))

        output['sources'].append(source_entry)

    research_agent_filename = 'research_agent_' + timestamp + '.json'
    # Save the combined JSON data to a file with UTF-8 encoding
    with open(research_agent_filename, 'w', encoding='utf-8') as file:
        json.dump(output, file, ensure_ascii=False, indent=4)
    filenames.append(research_agent_filename)
    print(f"Data has been written to {research_agent_filename}")

    # Thinking Ladder Agent
    timestamp = str(int(datetime.now().timestamp()))

    # Prepare the output structure
    output = {
        'timestamp': timestamp,
        'sources': []
    }

    # Convert each source DataFrame to JSON
    for source_name, source_data in classified_sources.items():
        source_entry = {'name': source_name}

        source_entry['data'] = json.loads(source_data.to_json(orient='records', force_ascii=False))

        output['sources'].append(source_entry)

    thinking_ladder_agent_filename = 'thinking_ladder_agent_' + timestamp + '.json'
    # Save the combined JSON data to a file with UTF-8 encoding
    with open(thinking_ladder_agent_filename, 'w', encoding='utf-8') as file:
        json.dump(output, file, ensure_ascii=False, indent=4)
    filenames.append(thinking_ladder_agent_filename)
    print(f"Data has been written to {thinking_ladder_agent_filename}")

    # Synthesis Agent
    timestamp = str(int(datetime.now().timestamp()))

    # Prepare the output structure
    output = {
        'timestamp': timestamp,
        'complete_synthesis': complete_synthesis,
        'synthesis': synthesis,
        'all_sub_syntheses': all_sub_syntheses,
        'all_sub_theme': all_sub_theme
    }

    synthesis_agent_filename = 'synthesis_agent_' + timestamp + '.json'
    # Save the JSON data to a file with UTF-8 encoding
    with open(synthesis_agent_filename, 'w', encoding='utf-8') as file:
        json.dump(output, file, ensure_ascii=False, indent=4)
    filenames.append(synthesis_agent_filename)
    print(f"Data has been written to {synthesis_agent_filename}")

    # Accountability Agent
    timestamp = str(int(datetime.now().timestamp()))
    log_entry['accountability_agent']['timestamp'] = timestamp
    accountability_agent_log_filename = 'accountability_agent_log_' + timestamp + '.json'
    with open(accountability_agent_log_filename, 'w', encoding='utf-8') as file:
        json.dump(log_entry['accountability_agent'], file, ensure_ascii=False, indent=4)
    filenames.append(accountability_agent_log_filename)
    print(f"Data has been written to {accountability_agent_log_filename}")

    print(f"Filenames: {filenames}")

    # Uploading to S3
    import boto3

    # Initialize the S3 client
    s3_client = boto3.client('s3')

    # The name of your S3 bucket
    bucket_name = 'dediro-bakcup-1'

    # Upload each file to the S3 bucket
    for filename in filenames:
        s3_client.upload_file(filename, bucket_name, filename)
        print(f"Uploaded {filename} to {bucket_name}")



    return complete_synthesis, accountability, cost


gr.Interface(
    fn=main,
    inputs=gr.Textbox(label="Query:"),
    outputs=[gr.Textbox(label="Generated Synthesis:"), gr.Textbox(label="Accountability:"), gr.Number(label="Cost ($):")],
).launch()