OxbridgeEconomics
commited on
Commit
•
ab30e46
0
Parent(s):
Initial commit
Browse files- .gitignore +2 -0
- .replit +20 -0
- README.md +0 -0
- app.py +27 -0
- controllers/__init__.py +0 -0
- controllers/article_query_service.py +85 -0
- controllers/article_search_service.py +75 -0
- poetry.lock +0 -0
- pyproject.toml +30 -0
- replit.nix +5 -0
- requirements.txt +0 -0
- routes/__initi__.py +3 -0
- routes/main.py +58 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
.env
|
.replit
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
entrypoint = "main.py"
|
2 |
+
modules = ["python-3.10"]
|
3 |
+
|
4 |
+
[nix]
|
5 |
+
channel = "stable-23_05"
|
6 |
+
|
7 |
+
[unitTest]
|
8 |
+
language = "python3"
|
9 |
+
|
10 |
+
[gitHubImport]
|
11 |
+
requiredFiles = [".replit", "replit.nix"]
|
12 |
+
|
13 |
+
[deployment]
|
14 |
+
run = ["python3", "main.py"]
|
15 |
+
deploymentTarget = "cloudrun"
|
16 |
+
|
17 |
+
[[ports]]
|
18 |
+
localPort = 5000
|
19 |
+
externalPort = 5000
|
20 |
+
exposeLocalhost = true
|
README.md
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module that initializes the Flask application."""
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from flask import Flask
|
7 |
+
from routes import main
|
8 |
+
|
9 |
+
load_dotenv(".env")
|
10 |
+
|
11 |
+
|
12 |
+
def create_app():
|
13 |
+
"""
|
14 |
+
Creates and configures the Flask application.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
Flask: The configured Flask application.
|
18 |
+
"""
|
19 |
+
flask_app = Flask(__name__)
|
20 |
+
logging.basicConfig(
|
21 |
+
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
22 |
+
logging.getLogger().setLevel(os.environ.get("LOG_LEVEL", "INFO").upper())
|
23 |
+
flask_app.register_blueprint(main.bp)
|
24 |
+
|
25 |
+
return flask_app
|
26 |
+
|
27 |
+
app = create_app()
|
controllers/__init__.py
ADDED
File without changes
|
controllers/article_query_service.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from pinecone import Pinecone
|
4 |
+
from langchain_openai import OpenAIEmbeddings
|
5 |
+
from langchain_pinecone import PineconeVectorStore
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
from langchain_core.output_parsers import StrOutputParser
|
9 |
+
from langchain_core.runnables import RunnablePassthrough
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
|
14 |
+
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
15 |
+
|
16 |
+
def article_agent(query, filter_params=None):
|
17 |
+
# Initialize Pinecone
|
18 |
+
try:
|
19 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
20 |
+
print('Pinecone Initialized')
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Failed to initialize Pinecone: {e}")
|
23 |
+
return None
|
24 |
+
|
25 |
+
# Initialize embeddings
|
26 |
+
embeddings = OpenAIEmbeddings()
|
27 |
+
|
28 |
+
# Create vector store
|
29 |
+
try:
|
30 |
+
index_name = 'finfast-macro-china'
|
31 |
+
index = pc.Index(index_name)
|
32 |
+
vectorstore = PineconeVectorStore(index, embeddings)
|
33 |
+
print('Vector Store Created')
|
34 |
+
except Exception as e:
|
35 |
+
print(f"Failed to create vector store: {e}")
|
36 |
+
return None
|
37 |
+
|
38 |
+
# Validate and setup retriever with dynamic filtering based on IDs provided in filter_params
|
39 |
+
try:
|
40 |
+
if filter_params and isinstance(filter_params, list) and all(isinstance(id, str) for id in filter_params):
|
41 |
+
search_filter = {"id": {"$in": filter_params}}
|
42 |
+
else:
|
43 |
+
if filter_params is not None:
|
44 |
+
print("Invalid filter_params: must be a list of string IDs")
|
45 |
+
return None
|
46 |
+
search_filter = {}
|
47 |
+
|
48 |
+
retriever = vectorstore.as_retriever(search_kwargs={'filter': search_filter})
|
49 |
+
print('Retriever Initialized')
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Error configuring the retriever: {e}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
# Initialize LLM
|
55 |
+
try:
|
56 |
+
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)
|
57 |
+
print('LLM Initialized')
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Failed to initialize LLM: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
# Setup the prompt template and RAG chain
|
63 |
+
try:
|
64 |
+
prompt_template = """
|
65 |
+
Assistant:
|
66 |
+
As an AI language model specialized in financial document search, your task is to assist users in finding relevant
|
67 |
+
financial documents. Your responses should be detailed, informative, and provide valuable insights. Please provide a
|
68 |
+
comprehensive answer based on the given query and context, going beyond basic information to offer deeper analysis and
|
69 |
+
explanations.
|
70 |
+
|
71 |
+
Query: {query}
|
72 |
+
|
73 |
+
Context:
|
74 |
+
{context}
|
75 |
+
|
76 |
+
Response:
|
77 |
+
"""
|
78 |
+
prompt = PromptTemplate(input_variables=['context', 'query'], template=prompt_template)
|
79 |
+
rag_chain = ({"context": retriever, "query": RunnablePassthrough()} | prompt | llm | StrOutputParser())
|
80 |
+
|
81 |
+
return rag_chain.invoke(query)
|
82 |
+
except Exception as e:
|
83 |
+
print(f"Error during RAG chain setup or execution: {e}")
|
84 |
+
return None
|
85 |
+
|
controllers/article_search_service.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import boto3
|
3 |
+
import pandas as pd
|
4 |
+
from boto3.dynamodb.conditions import Attr
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
|
10 |
+
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
|
11 |
+
|
12 |
+
def get_db_connection():
|
13 |
+
dynamodb = boto3.resource('dynamodb',
|
14 |
+
aws_access_key_id=AWS_ACCESS_KEY_ID,
|
15 |
+
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
|
16 |
+
region_name='us-east-1')
|
17 |
+
return dynamodb
|
18 |
+
|
19 |
+
def article_search(titles = None, categories = None):
|
20 |
+
"""Searches articles by titles and categories.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
titles (list, optional): A list of title keywords to search for.
|
24 |
+
categories (list, optional): A list of category keywords to search for.
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
pd.DataFrame: A DataFrame containing the IDs and contents of matching articles.
|
28 |
+
"""
|
29 |
+
dynamodb = get_db_connection()
|
30 |
+
table = dynamodb.Table('article_china')
|
31 |
+
|
32 |
+
# Build the filter expression
|
33 |
+
filter_expression = None
|
34 |
+
if titles:
|
35 |
+
title_filters = [Attr('title').contains(title) for title in titles]
|
36 |
+
filter_expression = title_filters[0]
|
37 |
+
for title_filter in title_filters[1:]:
|
38 |
+
filter_expression = filter_expression | title_filter
|
39 |
+
|
40 |
+
if categories:
|
41 |
+
category_filters = [Attr('category').contains(category) for category in categories]
|
42 |
+
if filter_expression is None:
|
43 |
+
filter_expression = category_filters[0]
|
44 |
+
else:
|
45 |
+
filter_expression = filter_expression | category_filters[0]
|
46 |
+
|
47 |
+
for category_filter in category_filters[1:]:
|
48 |
+
filter_expression = filter_expression | category_filter
|
49 |
+
|
50 |
+
# Perform the scan operation with the filter expression
|
51 |
+
try:
|
52 |
+
print(f"Scanning table with filter expression: {filter_expression}")
|
53 |
+
scan_kwargs = {
|
54 |
+
'ProjectionExpression': "#id, content",
|
55 |
+
'ExpressionAttributeNames': {"#id": "id"}
|
56 |
+
}
|
57 |
+
if filter_expression is not None:
|
58 |
+
scan_kwargs['FilterExpression'] = filter_expression
|
59 |
+
|
60 |
+
response = table.scan(**scan_kwargs)
|
61 |
+
items = response['Items']
|
62 |
+
|
63 |
+
# Collect additional items if response is paginated
|
64 |
+
while 'LastEvaluatedKey' in response:
|
65 |
+
scan_kwargs['ExclusiveStartKey'] = response['LastEvaluatedKey']
|
66 |
+
response = table.scan(**scan_kwargs)
|
67 |
+
items.extend(response['Items'])
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
print(f"Error during scan operation: {e}")
|
71 |
+
return pd.DataFrame()
|
72 |
+
|
73 |
+
return pd.DataFrame(items)
|
74 |
+
|
75 |
+
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "python-template"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Your Name <you@example.com>"]
|
6 |
+
|
7 |
+
[tool.poetry.dependencies]
|
8 |
+
python = ">=3.10.0,<3.12"
|
9 |
+
langchain = "0.2.7"
|
10 |
+
langchain-openai = "0.1.15"
|
11 |
+
langchain-pinecone = "0.1.1"
|
12 |
+
pinecone-client = "3.2.2"
|
13 |
+
python-dotenv = "^1.0.1"
|
14 |
+
langchain-core = "^0.2.16"
|
15 |
+
flask = "^3.0.3"
|
16 |
+
boto3 = "^1.34.144"
|
17 |
+
|
18 |
+
[tool.pyright]
|
19 |
+
# https://github.com/microsoft/pyright/blob/main/docs/configuration.md
|
20 |
+
useLibraryCodeForTypes = true
|
21 |
+
exclude = [".cache"]
|
22 |
+
|
23 |
+
[tool.ruff]
|
24 |
+
# https://beta.ruff.rs/docs/configuration/
|
25 |
+
select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
|
26 |
+
ignore = ['W291', 'W292', 'W293']
|
27 |
+
|
28 |
+
[build-system]
|
29 |
+
requires = ["poetry-core>=1.0.0"]
|
30 |
+
build-backend = "poetry.core.masonry.api"
|
replit.nix
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{pkgs}: {
|
2 |
+
deps = [
|
3 |
+
pkgs.bash
|
4 |
+
];
|
5 |
+
}
|
requirements.txt
ADDED
Binary file (2.25 kB). View file
|
|
routes/__initi__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Blueprint
|
2 |
+
|
3 |
+
bp = Blueprint("main", __name__)
|
routes/main.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import request, jsonify
|
2 |
+
from controllers.article_search_service import article_search
|
3 |
+
from controllers.article_query_service import article_agent
|
4 |
+
from . import bp
|
5 |
+
|
6 |
+
|
7 |
+
@api_bp.route('/')
|
8 |
+
def hello_world():
|
9 |
+
return "AI News Agent API"
|
10 |
+
|
11 |
+
|
12 |
+
@api_bp.route('/search', methods=['POST'])
|
13 |
+
def search_articles():
|
14 |
+
data = request.get_json() or {
|
15 |
+
} # Ensure data is a dictionary even if None is returned
|
16 |
+
titles = data.get('titles', [])
|
17 |
+
categories = data.get('categories', [])
|
18 |
+
|
19 |
+
try:
|
20 |
+
# Assuming article_search returns a DataFrame with columns 'id' and 'content'
|
21 |
+
articles_df = article_search(titles, categories)
|
22 |
+
if not articles_df.empty:
|
23 |
+
# Create a list of dictionaries, each containing 'id' and 'content'
|
24 |
+
articles_list = articles_df.to_dict(orient='records')
|
25 |
+
return jsonify({'articles': articles_list})
|
26 |
+
else:
|
27 |
+
return jsonify({
|
28 |
+
'articles': [],
|
29 |
+
'message': 'No articles found.'
|
30 |
+
}), 200
|
31 |
+
except Exception as e:
|
32 |
+
print("Error processing articles:", e)
|
33 |
+
return jsonify({'error': 'Failed to process articles'}), 500
|
34 |
+
|
35 |
+
|
36 |
+
@api_bp.route('/query', methods=['POST'])
|
37 |
+
def handle_query():
|
38 |
+
data = request.get_json()
|
39 |
+
if not data:
|
40 |
+
return jsonify({'error': 'No data provided'}), 400
|
41 |
+
|
42 |
+
# Extract IDs and query from the POST data
|
43 |
+
ids = data.get('ids',
|
44 |
+
[]) # Defaults to an empty list if 'ids' is not provided
|
45 |
+
query = data.get('query')
|
46 |
+
|
47 |
+
if not query:
|
48 |
+
return jsonify({'error': 'Query parameter is required'}), 400
|
49 |
+
|
50 |
+
# Call the article_agent function
|
51 |
+
try:
|
52 |
+
result = article_agent(query, filter_params=ids)
|
53 |
+
if result:
|
54 |
+
return jsonify({'response': result}), 200
|
55 |
+
else:
|
56 |
+
return jsonify({'error': 'Failed to process the query'}), 500
|
57 |
+
except Exception as e:
|
58 |
+
return jsonify({'error': str(e)}), 500
|