Spaces:

Oxbridge-Economics
/

AI-News-Agent

Running

App Files Files Community

OxbridgeEconomics commited on Jul 18

Commit

ab30e46

•

0 Parent(s):

Initial commit

Browse files

Files changed (13) hide show

.gitignore +2 -0
.replit +20 -0
README.md +0 -0
app.py +27 -0
controllers/__init__.py +0 -0
controllers/article_query_service.py +85 -0
controllers/article_search_service.py +75 -0
poetry.lock +0 -0
pyproject.toml +30 -0
replit.nix +5 -0
requirements.txt +0 -0
routes/__initi__.py +3 -0
routes/main.py +58 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv
2	+ .env

.replit ADDED Viewed

	@@ -0,0 +1,20 @@

+entrypoint = "main.py"
+modules = ["python-3.10"]
+[nix]
+channel = "stable-23_05"
+[unitTest]
+language = "python3"
+[gitHubImport]
+requiredFiles = [".replit", "replit.nix"]
+[deployment]
+run = ["python3", "main.py"]
+deploymentTarget = "cloudrun"
+[[ports]]
+localPort = 5000
+externalPort = 5000
+exposeLocalhost = true

README.md ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""Module that initializes the Flask application."""
+import logging
+import os
+from dotenv import load_dotenv
+from flask import Flask
+from routes import main
+load_dotenv(".env")
+def create_app():
+    """
+    Creates and configures the Flask application.
+    Returns:
+        Flask: The configured Flask application.
+    """
+    flask_app = Flask(__name__)
+    logging.basicConfig(
+        format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
+    logging.getLogger().setLevel(os.environ.get("LOG_LEVEL", "INFO").upper())
+    flask_app.register_blueprint(main.bp)
+    return flask_app
+app = create_app()

controllers/__init__.py ADDED Viewed

File without changes

controllers/article_query_service.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+from dotenv import load_dotenv
+from pinecone import Pinecone
+from langchain_openai import OpenAIEmbeddings
+from langchain_pinecone import PineconeVectorStore
+from langchain_openai import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+# Load environment variables
+load_dotenv()
+PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+def article_agent(query, filter_params=None):
+    # Initialize Pinecone
+    try:
+        pc = Pinecone(api_key=PINECONE_API_KEY)
+        print('Pinecone Initialized')
+    except Exception as e:
+        print(f"Failed to initialize Pinecone: {e}")
+        return None
+    # Initialize embeddings
+    embeddings = OpenAIEmbeddings()
+    # Create vector store
+    try:
+        index_name = 'finfast-macro-china'
+        index = pc.Index(index_name)
+        vectorstore = PineconeVectorStore(index, embeddings)
+        print('Vector Store Created')
+    except Exception as e:
+        print(f"Failed to create vector store: {e}")
+        return None
+    # Validate and setup retriever with dynamic filtering based on IDs provided in filter_params
+    try:
+        if filter_params and isinstance(filter_params, list) and all(isinstance(id, str) for id in filter_params):
+            search_filter = {"id": {"$in": filter_params}}
+        else:
+            if filter_params is not None:
+                print("Invalid filter_params: must be a list of string IDs")
+                return None
+            search_filter = {}
+        retriever = vectorstore.as_retriever(search_kwargs={'filter': search_filter})
+        print('Retriever Initialized')
+    except Exception as e:
+        print(f"Error configuring the retriever: {e}")
+        return None
+    # Initialize LLM
+    try:
+        llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)
+        print('LLM Initialized')
+    except Exception as e:
+        print(f"Failed to initialize LLM: {e}")
+        return None
+    # Setup the prompt template and RAG chain
+    try:
+        prompt_template = """
+        Assistant:
+        As an AI language model specialized in financial document search, your task is to assist users in finding relevant
+        financial documents. Your responses should be detailed, informative, and provide valuable insights. Please provide a
+        comprehensive answer based on the given query and context, going beyond basic information to offer deeper analysis and
+        explanations.
+        Query: {query}
+        Context:
+        {context}
+        Response:
+        """
+        prompt = PromptTemplate(input_variables=['context', 'query'], template=prompt_template)
+        rag_chain = ({"context": retriever, "query": RunnablePassthrough()} | prompt | llm | StrOutputParser())
+        return rag_chain.invoke(query)
+    except Exception as e:
+        print(f"Error during RAG chain setup or execution: {e}")
+        return None

controllers/article_search_service.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import boto3
+import pandas as pd
+from boto3.dynamodb.conditions import Attr
+from dotenv import load_dotenv
+load_dotenv()
+AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
+AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
+def get_db_connection():
+    dynamodb = boto3.resource('dynamodb',
+                              aws_access_key_id=AWS_ACCESS_KEY_ID,
+                              aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+                              region_name='us-east-1')
+    return dynamodb
+def article_search(titles = None, categories = None):
+    """Searches articles by titles and categories.
+    Args:
+        titles (list, optional): A list of title keywords to search for.
+        categories (list, optional): A list of category keywords to search for.
+    Returns:
+        pd.DataFrame: A DataFrame containing the IDs and contents of matching articles.
+    """
+    dynamodb = get_db_connection()
+    table = dynamodb.Table('article_china')
+    # Build the filter expression
+    filter_expression = None
+    if titles:
+        title_filters = [Attr('title').contains(title) for title in titles]
+        filter_expression = title_filters[0]
+        for title_filter in title_filters[1:]:
+            filter_expression = filter_expression | title_filter
+    if categories:
+        category_filters = [Attr('category').contains(category) for category in categories]
+        if filter_expression is None:
+            filter_expression = category_filters[0]
+        else:
+            filter_expression = filter_expression | category_filters[0]
+        for category_filter in category_filters[1:]:
+            filter_expression = filter_expression | category_filter
+    # Perform the scan operation with the filter expression
+    try:
+        print(f"Scanning table with filter expression: {filter_expression}")
+        scan_kwargs = {
+            'ProjectionExpression': "#id, content",
+            'ExpressionAttributeNames': {"#id": "id"}
+        }
+        if filter_expression is not None:
+            scan_kwargs['FilterExpression'] = filter_expression
+        response = table.scan(**scan_kwargs)
+        items = response['Items']
+        # Collect additional items if response is paginated
+        while 'LastEvaluatedKey' in response:
+            scan_kwargs['ExclusiveStartKey'] = response['LastEvaluatedKey']
+            response = table.scan(**scan_kwargs)
+            items.extend(response['Items'])
+    except Exception as e:
+        print(f"Error during scan operation: {e}")
+        return pd.DataFrame()
+    return pd.DataFrame(items)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,30 @@

+[tool.poetry]
+name = "python-template"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+[tool.poetry.dependencies]
+python = ">=3.10.0,<3.12"
+langchain = "0.2.7"
+langchain-openai = "0.1.15"
+langchain-pinecone = "0.1.1"
+pinecone-client = "3.2.2"
+python-dotenv = "^1.0.1"
+langchain-core = "^0.2.16"
+flask = "^3.0.3"
+boto3 = "^1.34.144"
+[tool.pyright]
+# https://github.com/microsoft/pyright/blob/main/docs/configuration.md
+useLibraryCodeForTypes = true
+exclude = [".cache"]
+[tool.ruff]
+# https://beta.ruff.rs/docs/configuration/
+select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
+ignore = ['W291', 'W292', 'W293']
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

replit.nix ADDED Viewed

	@@ -0,0 +1,5 @@

+{pkgs}: {
+  deps = [
+    pkgs.bash
+  ];
+}

requirements.txt ADDED Viewed

Binary file (2.25 kB). View file

routes/__initi__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from flask import Blueprint
2	+
3	+ bp = Blueprint("main", __name__)

routes/main.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from flask import request, jsonify
+from controllers.article_search_service import article_search
+from controllers.article_query_service import article_agent
+from . import bp
+@api_bp.route('/')
+def hello_world():
+    return "AI News Agent API"
+@api_bp.route('/search', methods=['POST'])
+def search_articles():
+    data = request.get_json() or {
+    }  # Ensure data is a dictionary even if None is returned
+    titles = data.get('titles', [])
+    categories = data.get('categories', [])
+    try:
+        # Assuming article_search returns a DataFrame with columns 'id' and 'content'
+        articles_df = article_search(titles, categories)
+        if not articles_df.empty:
+            # Create a list of dictionaries, each containing 'id' and 'content'
+            articles_list = articles_df.to_dict(orient='records')
+            return jsonify({'articles': articles_list})
+        else:
+            return jsonify({
+                'articles': [],
+                'message': 'No articles found.'
+            }), 200
+    except Exception as e:
+        print("Error processing articles:", e)
+        return jsonify({'error': 'Failed to process articles'}), 500
+@api_bp.route('/query', methods=['POST'])
+def handle_query():
+    data = request.get_json()
+    if not data:
+        return jsonify({'error': 'No data provided'}), 400
+    # Extract IDs and query from the POST data
+    ids = data.get('ids',
+                   [])  # Defaults to an empty list if 'ids' is not provided
+    query = data.get('query')
+    if not query:
+        return jsonify({'error': 'Query parameter is required'}), 400
+    # Call the article_agent function
+    try:
+        result = article_agent(query, filter_params=ids)
+        if result:
+            return jsonify({'response': result}), 200
+        else:
+            return jsonify({'error': 'Failed to process the query'}), 500
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500