Muhammad Abdur Rahman Saad commited on
Commit
c45e43e
1 Parent(s): ce74f9c

add policy analysis feature

Browse files
controllers/article_search_service.py CHANGED
@@ -72,4 +72,29 @@ def article_search(titles = None, categories = None):
72
 
73
  return pd.DataFrame(items)
74
 
75
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  return pd.DataFrame(items)
74
 
75
+ def article_search_by_id(id):
76
+ """Searches articles by ID.
77
+
78
+ Args:
79
+ id (str): The ID of the article to search for.
80
+
81
+ Returns:
82
+ content (str): content of the article
83
+ """
84
+ dynamodb = get_db_connection()
85
+ table = dynamodb.Table('article_china')
86
+ try:
87
+ # Perform the query using the article ID
88
+ response = table.get_item(
89
+ Key={'id': id},
90
+ ProjectionExpression='content' # Only retrieve the 'content' field
91
+ )
92
+ # Check if the item was found and return the content
93
+ if 'Item' in response:
94
+ return response['Item'].get('content', None)
95
+ else:
96
+ print("No article found with the given ID.")
97
+ return None
98
+ except Exception as e:
99
+ print(f"Error fetching article content: {e}")
100
+ return None
controllers/policy_comparison_service.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.pydantic_v1 import BaseModel, Field
2
+ from langchain.output_parsers import PydanticOutputParser
3
+ from langchain.prompts.chat import ChatPromptTemplate
4
+ from langchain_openai import AzureChatOpenAI
5
+ from textblob import TextBlob
6
+ import nltk
7
+ import os
8
+
9
+ nltk.download('punkt_tab')
10
+
11
+ os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
12
+ os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
13
+ os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
14
+ os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
15
+
16
+ llm = AzureChatOpenAI(
17
+ deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
18
+ azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
19
+ openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
20
+
21
+ def keyword_frequency(article_text, keyword):
22
+ blob = TextBlob(article_text)
23
+ return blob.words.count(keyword, case_sensitive=True)
24
+
25
+ class Policy_tone(BaseModel):
26
+ """
27
+ Represents a policy.
28
+
29
+ Attributes:
30
+ policies (list): Python list of dictionaries containing policy and its corresponding tone.
31
+ """
32
+ policies: list = Field(
33
+ description="""
34
+ Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
35
+ 1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
36
+ 2. tone: A dictionary mapping article names to brief descriptions of the tone used in discussing the policy within each respective article. The tone should succinctly describe the sentiment or style of the presentation.""")
37
+
38
+ def analyze_policy_tone(article1, article2):
39
+ """
40
+ Analyzes and compares policies based on their tone in two articles using a structured approach.
41
+
42
+ Args:
43
+ article1 (str): Content of the first article.
44
+ article2 (str): Content of the second article.
45
+
46
+ Returns:
47
+ dict: Results from the policy comparison.
48
+ """
49
+ # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
50
+ parser = PydanticOutputParser(pydantic_object=Policy_tone)
51
+ prompt = ChatPromptTemplate.from_messages([
52
+ ("system", "You are a Current Affairs Commentator.\
53
+ Your task is to analyze and compare the policies listed in the two articles provided.\
54
+ Extract and compare the list of (sub-)policies, after analyzing their corresponding tones"),
55
+ ("system", "Here is the first article: {article1}"),
56
+ ("system", "Here is the second article: {article2}"),
57
+ ("human", "Format instructions: {format_instructions}"),
58
+ ("placeholder", "{agent_scratchpad}"),
59
+ ])
60
+
61
+ chain = prompt | llm | parser
62
+
63
+ return chain.invoke({
64
+ 'article1': article1,
65
+ 'article2': article2,
66
+ 'format_instructions': parser.get_format_instructions()
67
+ })
68
+
69
+ class Policy_desc(BaseModel):
70
+ """
71
+ Represents a policy.
72
+
73
+ Attributes:
74
+ policies (list): Python list of dictionaries containing policy and its corresponding tone , and differing descriptions.
75
+ """
76
+ policies: list = Field(
77
+ description=
78
+ '''Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
79
+ 1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
80
+ 2. descriptions: A dictionary mapping article names to detailed descriptions of the policy. These descriptions should not only provide in-depth information and context about the policy as discussed in each article but also include a comparison key. This key should detail the concrete policy changes, contrasting how each article addresses changes or continuity in the policy over time or across different contexts.''')
81
+
82
+ def analyze_policy_description(article1: str, article2: str):
83
+ """
84
+ Analyzes and compares the policy headers and descriptions in two articles.
85
+
86
+ Args:
87
+ article1 (str): Content of the first article.
88
+ article2 (str): Content of the second article.
89
+
90
+ Returns:
91
+ dict: Results from the policy and description comparison.
92
+ """
93
+ parser = PydanticOutputParser(pydantic_object=Policy_desc)
94
+ prompt = ChatPromptTemplate.from_messages([
95
+ ("system", "You are a Current Affairs Commentator.\
96
+ Your task is to analyze and compare the policies listed in the two articles provided.\
97
+ Extract and compare the list of (sub-)policies, analyze the corresponding descriptions, and quote the description of each policy from both articles as detailed as possible."),
98
+ ("system", "Here is the first article: {article1}"),
99
+ ("system", "Here is the second article: {article2}"),
100
+ ("human", "Format instructions: {format_instructions}"),
101
+ ("placeholder", "{agent_scratchpad}"),
102
+ ])
103
+
104
+ chain = prompt | llm | parser
105
+
106
+ result = chain.invoke({
107
+ 'article1': article1,
108
+ 'article2': article2,
109
+ 'format_instructions': parser.get_format_instructions()
110
+ })
111
+
112
+ return result
playground/policy_comparison.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.pydantic_v1 import BaseModel, Field
2
+ from langchain.output_parsers import PydanticOutputParser
3
+ from langchain.prompts.chat import ChatPromptTemplate
4
+ from langchain_openai import AzureChatOpenAI
5
+ from textblob import TextBlob
6
+ import nltk
7
+ import os
8
+
9
+ nltk.download('punkt_tab')
10
+
11
+ os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
12
+ os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
13
+ os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
14
+ os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
15
+
16
+ llm = AzureChatOpenAI(
17
+ deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
18
+ azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
19
+ openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
20
+
21
+
22
+ with open('article1_2023.txt', 'r') as file:
23
+ article_1 = file.read()
24
+ with open('article2_2022.txt', 'r') as file:
25
+ article_2 = file.read()
26
+
27
+ class Policy_desc(BaseModel):
28
+ """
29
+ Represents a policy.
30
+
31
+ Attributes:
32
+ policies (list): Python list of dictionaries containing policy and its corresponding tone , and differing descriptions.
33
+ """
34
+ policies: list = Field(
35
+ description=
36
+ '''Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
37
+ 1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
38
+ 2. descriptions: A dictionary mapping article names to detailed descriptions of the policy. These descriptions should not only provide in-depth information and context about the policy as discussed in each article but also include a comparison key. This key should detail the concrete policy changes, contrasting how each article addresses changes or continuity in the policy over time or across different contexts.''')
39
+
40
+ def analyze_policy_description(article1: str, article2: str):
41
+ """
42
+ Analyzes and compares the policy headers and descriptions in two articles.
43
+
44
+ Args:
45
+ article1 (str): Content of the first article.
46
+ article2 (str): Content of the second article.
47
+
48
+ Returns:
49
+ dict: Results from the policy and description comparison.
50
+ """
51
+ parser = PydanticOutputParser(pydantic_object=Policy_desc)
52
+ prompt = ChatPromptTemplate.from_messages([
53
+ ("system", "You are a Current Affairs Commentator.\
54
+ Your task is to analyze and compare the policies listed in the two articles provided.\
55
+ Extract and compare the list of (sub-)policies, analyze the corresponding descriptions, and quote the description of each policy from both articles as detailed as possible."),
56
+ ("system", "Here is the first article: {article1}"),
57
+ ("system", "Here is the second article: {article2}"),
58
+ ("human", "Format instructions: {format_instructions}"),
59
+ ("placeholder", "{agent_scratchpad}"),
60
+ ])
61
+
62
+ chain = prompt | llm | parser
63
+
64
+ result = chain.invoke({
65
+ 'article1': article1,
66
+ 'article2': article2,
67
+ 'format_instructions': parser.get_format_instructions()
68
+ })
69
+
70
+ return result
71
+
72
+
73
+ desc_comparison = analyze_policy_description(article_1, article_2)
74
+ print(desc_comparison)
playground/policy_extraction.py CHANGED
@@ -72,3 +72,5 @@ print(result)
72
  #print("Polarity score of the article:", analyze_polarity(article2))
73
  #print("Frequency of 'Xi':", keyword_frequency(article2, "Xi"))
74
  #print("Frequency of 'meeting':", keyword_frequency(article2, "meeting"))
 
 
 
72
  #print("Polarity score of the article:", analyze_polarity(article2))
73
  #print("Frequency of 'Xi':", keyword_frequency(article2, "Xi"))
74
  #print("Frequency of 'meeting':", keyword_frequency(article2, "meeting"))
75
+
76
+
playground/policy_tone.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.pydantic_v1 import BaseModel, Field
2
+ from langchain.output_parsers import PydanticOutputParser
3
+ from langchain.prompts.chat import ChatPromptTemplate
4
+ from langchain_openai import AzureChatOpenAI
5
+ from textblob import TextBlob
6
+ import nltk
7
+ import os
8
+
9
+ nltk.download('punkt_tab')
10
+
11
+ os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
12
+ os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
13
+ os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
14
+ os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
15
+
16
+ llm = AzureChatOpenAI(
17
+ deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
18
+ azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
19
+ openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
20
+
21
+ with open('article1_2023.txt', 'r') as file:
22
+ article1 = file.read()
23
+ with open('article2_2022.txt', 'r') as file:
24
+ article2 = file.read()
25
+
26
+ class Policy_tone(BaseModel):
27
+ """
28
+ Represents a policy.
29
+
30
+ Attributes:
31
+ policies (list): Python list of dictionaries containing policy and its corresponding tone.
32
+ """
33
+ policies: list = Field(
34
+ description="""
35
+ Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
36
+ 1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
37
+ 2. tone: A dictionary mapping article names to brief descriptions of the tone used in discussing the policy within each respective article. The tone should succinctly describe the sentiment or style of the presentation.""")
38
+
39
+ def analyze_policy_tone(article1, article2):
40
+ """
41
+ Analyzes and compares policies based on their tone in two articles using a structured approach.
42
+
43
+ Args:
44
+ article1 (str): Content of the first article.
45
+ article2 (str): Content of the second article.
46
+
47
+ Returns:
48
+ dict: Results from the policy comparison.
49
+ """
50
+ # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
51
+ parser = PydanticOutputParser(pydantic_object=Policy_tone)
52
+ prompt = ChatPromptTemplate.from_messages([
53
+ ("system", "You are a Current Affairs Commentator.\
54
+ Your task is to analyze and compare the policies listed in the two articles provided.\
55
+ Extract and compare the list of (sub-)policies, after analyzing their corresponding tones"),
56
+ ("system", "Here is the first article: {article1}"),
57
+ ("system", "Here is the second article: {article2}"),
58
+ ("human", "Format instructions: {format_instructions}"),
59
+ ("placeholder", "{agent_scratchpad}"),
60
+ ])
61
+
62
+ chain = prompt | llm | parser
63
+
64
+ return chain.invoke({
65
+ 'article1': article1,
66
+ 'article2': article2,
67
+ 'format_instructions': parser.get_format_instructions()
68
+ })
69
+
70
+ result = analyze_policy_tone(article1, article2)
71
+ print (result)
playground/test.py DELETED
@@ -1,29 +0,0 @@
1
- def generate_data_context(df):
2
- """
3
- Generate a data context string for a given dataframe.
4
-
5
- Parameters:
6
- df (pandas.DataFrame): The input dataframe.
7
-
8
- Returns:
9
- str: The data context string containing schema description, \
10
- summary statistics, and sample data.
11
- """
12
- try:
13
- schema_description = "This is a dataframe that will be passed internally. \
14
- The database has the following columns: "
15
-
16
- schema_description += ", ".join(
17
- [f"{col} ({dtype})" for col, dtype in df.dtypes.items()])
18
-
19
- summary_stats = df.describe().to_string()
20
-
21
- sample_data = df.head().to_string()
22
-
23
- data_context = f"{schema_description}.\n\
24
- Summary Statistics:\n{summary_stats}\n\
25
- Sample Data:\n{sample_data}"
26
-
27
- return data_context
28
- except Exception as e:
29
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
routes/main.py CHANGED
@@ -1,5 +1,6 @@
 
1
  from flask import request, jsonify
2
- from controllers.article_search_service import article_search
3
  from controllers.article_query_service import article_agent
4
  from . import bp
5
 
@@ -32,7 +33,6 @@ def search_articles():
32
  print("Error processing articles:", e)
33
  return jsonify({'error': 'Failed to process articles'}), 500
34
 
35
-
36
  @bp.route('/query', methods=['POST'])
37
  def handle_query():
38
  data = request.get_json()
@@ -56,3 +56,75 @@ def handle_query():
56
  return jsonify({'error': 'Failed to process the query'}), 500
57
  except Exception as e:
58
  return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from controllers.policy_comparison_service import keyword_frequency, analyze_policy_tone, analyze_policy_description
2
  from flask import request, jsonify
3
+ from controllers.article_search_service import article_search, article_search_by_id
4
  from controllers.article_query_service import article_agent
5
  from . import bp
6
 
 
33
  print("Error processing articles:", e)
34
  return jsonify({'error': 'Failed to process articles'}), 500
35
 
 
36
  @bp.route('/query', methods=['POST'])
37
  def handle_query():
38
  data = request.get_json()
 
56
  return jsonify({'error': 'Failed to process the query'}), 500
57
  except Exception as e:
58
  return jsonify({'error': str(e)}), 500
59
+
60
+ @bp.route('/keyword', methods=['POST'])
61
+ def handle_keyword():
62
+ data = request.get_json()
63
+ if not data:
64
+ return jsonify({'error': 'No data provided'}), 400
65
+ source = data.get('source')
66
+ keyword = data.get('keyword')
67
+ article1 = data.get('article1')
68
+ article2 = data.get('article2')
69
+ frequency_article1 = 0
70
+ frequency_article2 = 0
71
+ if not source or not keyword or not article1 or not article2:
72
+ return jsonify({'error': 'Missing parameters'}), 400
73
+ if source == 'external':
74
+ frequency_article1 = keyword_frequency(article1, keyword)
75
+ frequency_article2 = keyword_frequency(article2, keyword)
76
+ elif source == 'internal':
77
+ article1 = article_search_by_id(article1)
78
+ article2 = article_search_by_id(article2)
79
+ frequency_article1 = keyword_frequency(article1, keyword)
80
+ frequency_article2 = keyword_frequency(article2, keyword)
81
+ if frequency_article1 is None or frequency_article2 is None:
82
+ return jsonify({'error': 'failed to retreive the article'}), 400
83
+ return jsonify({'frequency_article1': frequency_article1, 'frequency_article2': frequency_article2})
84
+
85
+ @bp.route('/tone', methods=['POST'])
86
+ def compare_tone():
87
+ data = request.get_json()
88
+ if not data:
89
+ return jsonify({'error': 'No data provided'}), 400
90
+ source = data.get('source')
91
+ article1 = data.get('article1')
92
+ article2 = data.get('article2')
93
+
94
+ if not source or not article1 or not article2:
95
+ return jsonify({'error': 'Missing parameters'}), 400
96
+
97
+ try:
98
+ if source == 'internal':
99
+ article1 = article_search_by_id(article1)
100
+ article2 = article_search_by_id(article2)
101
+ if article1 is None or article2 is None:
102
+ return jsonify({'error': 'Failed to retrieve articles'}), 404
103
+ result = analyze_policy_tone(article1, article2)
104
+ return jsonify(result), 200
105
+ except Exception as e:
106
+ return jsonify({'error': str(e)}), 500
107
+
108
+
109
+ @bp.route('/compare', methods=['POST'])
110
+ def compare_policy():
111
+ data = request.get_json()
112
+ if not data:
113
+ return jsonify({'error': 'No data provided'}), 400
114
+ source = data.get('source')
115
+ article1 = data.get('article1')
116
+ article2 = data.get('article2')
117
+
118
+ if not source or not article1 or not article2:
119
+ return jsonify({'error': 'Missing parameters'}), 400
120
+
121
+ try:
122
+ if source == 'internal':
123
+ article1 = article_search_by_id(article1)
124
+ article2 = article_search_by_id(article2)
125
+ if article1 is None or article2 is None:
126
+ return jsonify({'error': 'Failed to retrieve articles'}), 404
127
+ result = analyze_policy_description(article1, article2)
128
+ return jsonify(result), 200
129
+ except Exception as e:
130
+ return jsonify({'error': str(e)}), 500