OxbridgeEconomics commited on
Commit
ab30e46
0 Parent(s):

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv
2
+ .env
.replit ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entrypoint = "main.py"
2
+ modules = ["python-3.10"]
3
+
4
+ [nix]
5
+ channel = "stable-23_05"
6
+
7
+ [unitTest]
8
+ language = "python3"
9
+
10
+ [gitHubImport]
11
+ requiredFiles = [".replit", "replit.nix"]
12
+
13
+ [deployment]
14
+ run = ["python3", "main.py"]
15
+ deploymentTarget = "cloudrun"
16
+
17
+ [[ports]]
18
+ localPort = 5000
19
+ externalPort = 5000
20
+ exposeLocalhost = true
README.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module that initializes the Flask application."""
2
+ import logging
3
+ import os
4
+
5
+ from dotenv import load_dotenv
6
+ from flask import Flask
7
+ from routes import main
8
+
9
+ load_dotenv(".env")
10
+
11
+
12
+ def create_app():
13
+ """
14
+ Creates and configures the Flask application.
15
+
16
+ Returns:
17
+ Flask: The configured Flask application.
18
+ """
19
+ flask_app = Flask(__name__)
20
+ logging.basicConfig(
21
+ format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
22
+ logging.getLogger().setLevel(os.environ.get("LOG_LEVEL", "INFO").upper())
23
+ flask_app.register_blueprint(main.bp)
24
+
25
+ return flask_app
26
+
27
+ app = create_app()
controllers/__init__.py ADDED
File without changes
controllers/article_query_service.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from pinecone import Pinecone
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+ PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
14
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
15
+
16
+ def article_agent(query, filter_params=None):
17
+ # Initialize Pinecone
18
+ try:
19
+ pc = Pinecone(api_key=PINECONE_API_KEY)
20
+ print('Pinecone Initialized')
21
+ except Exception as e:
22
+ print(f"Failed to initialize Pinecone: {e}")
23
+ return None
24
+
25
+ # Initialize embeddings
26
+ embeddings = OpenAIEmbeddings()
27
+
28
+ # Create vector store
29
+ try:
30
+ index_name = 'finfast-macro-china'
31
+ index = pc.Index(index_name)
32
+ vectorstore = PineconeVectorStore(index, embeddings)
33
+ print('Vector Store Created')
34
+ except Exception as e:
35
+ print(f"Failed to create vector store: {e}")
36
+ return None
37
+
38
+ # Validate and setup retriever with dynamic filtering based on IDs provided in filter_params
39
+ try:
40
+ if filter_params and isinstance(filter_params, list) and all(isinstance(id, str) for id in filter_params):
41
+ search_filter = {"id": {"$in": filter_params}}
42
+ else:
43
+ if filter_params is not None:
44
+ print("Invalid filter_params: must be a list of string IDs")
45
+ return None
46
+ search_filter = {}
47
+
48
+ retriever = vectorstore.as_retriever(search_kwargs={'filter': search_filter})
49
+ print('Retriever Initialized')
50
+ except Exception as e:
51
+ print(f"Error configuring the retriever: {e}")
52
+ return None
53
+
54
+ # Initialize LLM
55
+ try:
56
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)
57
+ print('LLM Initialized')
58
+ except Exception as e:
59
+ print(f"Failed to initialize LLM: {e}")
60
+ return None
61
+
62
+ # Setup the prompt template and RAG chain
63
+ try:
64
+ prompt_template = """
65
+ Assistant:
66
+ As an AI language model specialized in financial document search, your task is to assist users in finding relevant
67
+ financial documents. Your responses should be detailed, informative, and provide valuable insights. Please provide a
68
+ comprehensive answer based on the given query and context, going beyond basic information to offer deeper analysis and
69
+ explanations.
70
+
71
+ Query: {query}
72
+
73
+ Context:
74
+ {context}
75
+
76
+ Response:
77
+ """
78
+ prompt = PromptTemplate(input_variables=['context', 'query'], template=prompt_template)
79
+ rag_chain = ({"context": retriever, "query": RunnablePassthrough()} | prompt | llm | StrOutputParser())
80
+
81
+ return rag_chain.invoke(query)
82
+ except Exception as e:
83
+ print(f"Error during RAG chain setup or execution: {e}")
84
+ return None
85
+
controllers/article_search_service.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import boto3
3
+ import pandas as pd
4
+ from boto3.dynamodb.conditions import Attr
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
10
+ AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
11
+
12
+ def get_db_connection():
13
+ dynamodb = boto3.resource('dynamodb',
14
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
15
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
16
+ region_name='us-east-1')
17
+ return dynamodb
18
+
19
+ def article_search(titles = None, categories = None):
20
+ """Searches articles by titles and categories.
21
+
22
+ Args:
23
+ titles (list, optional): A list of title keywords to search for.
24
+ categories (list, optional): A list of category keywords to search for.
25
+
26
+ Returns:
27
+ pd.DataFrame: A DataFrame containing the IDs and contents of matching articles.
28
+ """
29
+ dynamodb = get_db_connection()
30
+ table = dynamodb.Table('article_china')
31
+
32
+ # Build the filter expression
33
+ filter_expression = None
34
+ if titles:
35
+ title_filters = [Attr('title').contains(title) for title in titles]
36
+ filter_expression = title_filters[0]
37
+ for title_filter in title_filters[1:]:
38
+ filter_expression = filter_expression | title_filter
39
+
40
+ if categories:
41
+ category_filters = [Attr('category').contains(category) for category in categories]
42
+ if filter_expression is None:
43
+ filter_expression = category_filters[0]
44
+ else:
45
+ filter_expression = filter_expression | category_filters[0]
46
+
47
+ for category_filter in category_filters[1:]:
48
+ filter_expression = filter_expression | category_filter
49
+
50
+ # Perform the scan operation with the filter expression
51
+ try:
52
+ print(f"Scanning table with filter expression: {filter_expression}")
53
+ scan_kwargs = {
54
+ 'ProjectionExpression': "#id, content",
55
+ 'ExpressionAttributeNames': {"#id": "id"}
56
+ }
57
+ if filter_expression is not None:
58
+ scan_kwargs['FilterExpression'] = filter_expression
59
+
60
+ response = table.scan(**scan_kwargs)
61
+ items = response['Items']
62
+
63
+ # Collect additional items if response is paginated
64
+ while 'LastEvaluatedKey' in response:
65
+ scan_kwargs['ExclusiveStartKey'] = response['LastEvaluatedKey']
66
+ response = table.scan(**scan_kwargs)
67
+ items.extend(response['Items'])
68
+
69
+ except Exception as e:
70
+ print(f"Error during scan operation: {e}")
71
+ return pd.DataFrame()
72
+
73
+ return pd.DataFrame(items)
74
+
75
+
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "python-template"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Your Name <you@example.com>"]
6
+
7
+ [tool.poetry.dependencies]
8
+ python = ">=3.10.0,<3.12"
9
+ langchain = "0.2.7"
10
+ langchain-openai = "0.1.15"
11
+ langchain-pinecone = "0.1.1"
12
+ pinecone-client = "3.2.2"
13
+ python-dotenv = "^1.0.1"
14
+ langchain-core = "^0.2.16"
15
+ flask = "^3.0.3"
16
+ boto3 = "^1.34.144"
17
+
18
+ [tool.pyright]
19
+ # https://github.com/microsoft/pyright/blob/main/docs/configuration.md
20
+ useLibraryCodeForTypes = true
21
+ exclude = [".cache"]
22
+
23
+ [tool.ruff]
24
+ # https://beta.ruff.rs/docs/configuration/
25
+ select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
26
+ ignore = ['W291', 'W292', 'W293']
27
+
28
+ [build-system]
29
+ requires = ["poetry-core>=1.0.0"]
30
+ build-backend = "poetry.core.masonry.api"
replit.nix ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {pkgs}: {
2
+ deps = [
3
+ pkgs.bash
4
+ ];
5
+ }
requirements.txt ADDED
Binary file (2.25 kB). View file
 
routes/__initi__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from flask import Blueprint
2
+
3
+ bp = Blueprint("main", __name__)
routes/main.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import request, jsonify
2
+ from controllers.article_search_service import article_search
3
+ from controllers.article_query_service import article_agent
4
+ from . import bp
5
+
6
+
7
+ @api_bp.route('/')
8
+ def hello_world():
9
+ return "AI News Agent API"
10
+
11
+
12
+ @api_bp.route('/search', methods=['POST'])
13
+ def search_articles():
14
+ data = request.get_json() or {
15
+ } # Ensure data is a dictionary even if None is returned
16
+ titles = data.get('titles', [])
17
+ categories = data.get('categories', [])
18
+
19
+ try:
20
+ # Assuming article_search returns a DataFrame with columns 'id' and 'content'
21
+ articles_df = article_search(titles, categories)
22
+ if not articles_df.empty:
23
+ # Create a list of dictionaries, each containing 'id' and 'content'
24
+ articles_list = articles_df.to_dict(orient='records')
25
+ return jsonify({'articles': articles_list})
26
+ else:
27
+ return jsonify({
28
+ 'articles': [],
29
+ 'message': 'No articles found.'
30
+ }), 200
31
+ except Exception as e:
32
+ print("Error processing articles:", e)
33
+ return jsonify({'error': 'Failed to process articles'}), 500
34
+
35
+
36
+ @api_bp.route('/query', methods=['POST'])
37
+ def handle_query():
38
+ data = request.get_json()
39
+ if not data:
40
+ return jsonify({'error': 'No data provided'}), 400
41
+
42
+ # Extract IDs and query from the POST data
43
+ ids = data.get('ids',
44
+ []) # Defaults to an empty list if 'ids' is not provided
45
+ query = data.get('query')
46
+
47
+ if not query:
48
+ return jsonify({'error': 'Query parameter is required'}), 400
49
+
50
+ # Call the article_agent function
51
+ try:
52
+ result = article_agent(query, filter_params=ids)
53
+ if result:
54
+ return jsonify({'response': result}), 200
55
+ else:
56
+ return jsonify({'error': 'Failed to process the query'}), 500
57
+ except Exception as e:
58
+ return jsonify({'error': str(e)}), 500