Spaces:

Oxbridge-Economics
/

AI-News-Agent

Running

App Files Files Community

Muhammad Abdur Rahman Saad commited on Aug 18

Commit

c45e43e

•

1 Parent(s): ce74f9c

add policy analysis feature

Browse files

Files changed (7) hide show

controllers/article_search_service.py +26 -1
controllers/policy_comparison_service.py +112 -0
playground/policy_comparison.py +74 -0
playground/policy_extraction.py +2 -0
playground/policy_tone.py +71 -0
playground/test.py +0 -29
routes/main.py +74 -2

controllers/article_search_service.py CHANGED Viewed

	@@ -72,4 +72,29 @@ def article_search(titles = None, categories = None):
72
73	return pd.DataFrame(items)
74
75	-

     return pd.DataFrame(items)
+def article_search_by_id(id):
+    """Searches articles by ID.
+    Args:
+        id (str): The ID of the article to search for.
+    Returns:
+        content (str): content of the article
+    """
+    dynamodb = get_db_connection()
+    table = dynamodb.Table('article_china')
+    try:
+        # Perform the query using the article ID
+        response = table.get_item(
+            Key={'id': id},
+            ProjectionExpression='content'  # Only retrieve the 'content' field
+        )
+        # Check if the item was found and return the content
+        if 'Item' in response:
+            return response['Item'].get('content', None)
+        else:
+            print("No article found with the given ID.")
+            return None
+    except Exception as e:
+        print(f"Error fetching article content: {e}")
+        return None

controllers/policy_comparison_service.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts.chat import ChatPromptTemplate
+from langchain_openai import AzureChatOpenAI
+from textblob import TextBlob
+import nltk
+import os
+nltk.download('punkt_tab')
+os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
+os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
+os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
+os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
+llm = AzureChatOpenAI(
+    deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
+def keyword_frequency(article_text, keyword):
+    blob = TextBlob(article_text)
+    return blob.words.count(keyword, case_sensitive=True)
+class Policy_tone(BaseModel):
+  """
+  Represents a policy.
+  Attributes:
+      policies (list): Python list of dictionaries containing policy and its corresponding tone.
+  """
+  policies: list = Field(
+      description="""
+      Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
+      1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
+      2. tone: A dictionary mapping article names to brief descriptions of the tone used in discussing the policy within each respective article. The tone should succinctly describe the sentiment or style of the presentation.""")
+def analyze_policy_tone(article1, article2):
+  """
+  Analyzes and compares policies based on their tone in two articles using a structured approach.
+  Args:
+  article1 (str): Content of the first article.
+  article2 (str): Content of the second article.
+  Returns:
+  dict: Results from the policy comparison.
+  """
+  # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
+  parser = PydanticOutputParser(pydantic_object=Policy_tone)
+  prompt = ChatPromptTemplate.from_messages([
+      ("system", "You are a Current Affairs Commentator.\
+                  Your task is to analyze and compare the policies listed in the two articles provided.\
+                  Extract and compare the list of (sub-)policies, after analyzing their corresponding tones"),
+      ("system", "Here is the first article: {article1}"),
+      ("system", "Here is the second article: {article2}"),
+      ("human", "Format instructions: {format_instructions}"),
+      ("placeholder", "{agent_scratchpad}"),
+  ])
+  chain = prompt | llm | parser
+  return chain.invoke({
+      'article1': article1,
+      'article2': article2,
+      'format_instructions': parser.get_format_instructions()
+  })
+class Policy_desc(BaseModel):
+    """
+    Represents a policy.
+    Attributes:
+        policies (list): Python list of dictionaries containing policy and its corresponding tone , and differing descriptions.
+    """
+    policies: list = Field(
+        description=
+        '''Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
+1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
+2. descriptions: A dictionary mapping article names to detailed descriptions of the policy. These descriptions should not only provide in-depth information and context about the policy as discussed in each article but also include a comparison key. This key should detail the concrete policy changes, contrasting how each article addresses changes or continuity in the policy over time or across different contexts.''')
+def analyze_policy_description(article1: str, article2: str):
+    """
+    Analyzes and compares the policy headers and descriptions in two articles.
+    Args:
+        article1 (str): Content of the first article.
+        article2 (str): Content of the second article.
+    Returns:
+        dict: Results from the policy and description comparison.
+    """
+    parser = PydanticOutputParser(pydantic_object=Policy_desc)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a Current Affairs Commentator.\
+                    Your task is to analyze and compare the policies listed in the two articles provided.\
+                    Extract and compare the list of (sub-)policies, analyze the corresponding descriptions, and quote the description of each policy from both articles as detailed as possible."),
+        ("system", "Here is the first article: {article1}"),
+        ("system", "Here is the second article: {article2}"),
+        ("human", "Format instructions: {format_instructions}"),
+        ("placeholder", "{agent_scratchpad}"),
+    ])
+    chain = prompt | llm | parser
+    result = chain.invoke({
+        'article1': article1,
+        'article2': article2,
+        'format_instructions': parser.get_format_instructions()
+    })
+    return result

playground/policy_comparison.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts.chat import ChatPromptTemplate
+from langchain_openai import AzureChatOpenAI
+from textblob import TextBlob
+import nltk
+import os
+nltk.download('punkt_tab')
+os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
+os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
+os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
+os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
+llm = AzureChatOpenAI(
+    deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
+with open('article1_2023.txt', 'r') as file:
+  article_1 = file.read()
+with open('article2_2022.txt', 'r') as file:
+  article_2 = file.read()
+class Policy_desc(BaseModel):
+    """
+    Represents a policy.
+    Attributes:
+        policies (list): Python list of dictionaries containing policy and its corresponding tone , and differing descriptions.
+    """
+    policies: list = Field(
+        description=
+        '''Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
+1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
+2. descriptions: A dictionary mapping article names to detailed descriptions of the policy. These descriptions should not only provide in-depth information and context about the policy as discussed in each article but also include a comparison key. This key should detail the concrete policy changes, contrasting how each article addresses changes or continuity in the policy over time or across different contexts.''')
+def analyze_policy_description(article1: str, article2: str):
+    """
+    Analyzes and compares the policy headers and descriptions in two articles.
+    Args:
+        article1 (str): Content of the first article.
+        article2 (str): Content of the second article.
+    Returns:
+        dict: Results from the policy and description comparison.
+    """
+    parser = PydanticOutputParser(pydantic_object=Policy_desc)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a Current Affairs Commentator.\
+                    Your task is to analyze and compare the policies listed in the two articles provided.\
+                    Extract and compare the list of (sub-)policies, analyze the corresponding descriptions, and quote the description of each policy from both articles as detailed as possible."),
+        ("system", "Here is the first article: {article1}"),
+        ("system", "Here is the second article: {article2}"),
+        ("human", "Format instructions: {format_instructions}"),
+        ("placeholder", "{agent_scratchpad}"),
+    ])
+    chain = prompt | llm | parser
+    result = chain.invoke({
+        'article1': article1,
+        'article2': article2,
+        'format_instructions': parser.get_format_instructions()
+    })
+    return result
+desc_comparison = analyze_policy_description(article_1, article_2)
+print(desc_comparison)

playground/policy_extraction.py CHANGED Viewed

@@ -72,3 +72,5 @@ print(result)
 #print("Polarity score of the article:", analyze_polarity(article2))
 #print("Frequency of 'Xi':", keyword_frequency(article2, "Xi"))
 #print("Frequency of 'meeting':", keyword_frequency(article2, "meeting"))

 #print("Polarity score of the article:", analyze_polarity(article2))
 #print("Frequency of 'Xi':", keyword_frequency(article2, "Xi"))
 #print("Frequency of 'meeting':", keyword_frequency(article2, "meeting"))

playground/policy_tone.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts.chat import ChatPromptTemplate
+from langchain_openai import AzureChatOpenAI
+from textblob import TextBlob
+import nltk
+import os
+nltk.download('punkt_tab')
+os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
+os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
+os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
+os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-4"
+llm = AzureChatOpenAI(
+    deployment_name=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
+with open('article1_2023.txt', 'r') as file:
+    article1 = file.read()
+with open('article2_2022.txt', 'r') as file:
+    article2 = file.read()
+class Policy_tone(BaseModel):
+  """
+  Represents a policy.
+  Attributes:
+      policies (list): Python list of dictionaries containing policy and its corresponding tone.
+  """
+  policies: list = Field(
+      description="""
+      Create a Python list of dictionaries where each dictionary represents a policy. Each dictionary should contain the following keys:
+      1. policy: A brief header summarizing the overarching policy. This should be concise and to the point.
+      2. tone: A dictionary mapping article names to brief descriptions of the tone used in discussing the policy within each respective article. The tone should succinctly describe the sentiment or style of the presentation.""")
+def analyze_policy_tone(article1, article2):
+  """
+  Analyzes and compares policies based on their tone in two articles using a structured approach.
+  Args:
+  article1 (str): Content of the first article.
+  article2 (str): Content of the second article.
+  Returns:
+  dict: Results from the policy comparison.
+  """
+  # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
+  parser = PydanticOutputParser(pydantic_object=Policy_tone)
+  prompt = ChatPromptTemplate.from_messages([
+      ("system", "You are a Current Affairs Commentator.\
+                  Your task is to analyze and compare the policies listed in the two articles provided.\
+                  Extract and compare the list of (sub-)policies, after analyzing their corresponding tones"),
+      ("system", "Here is the first article: {article1}"),
+      ("system", "Here is the second article: {article2}"),
+      ("human", "Format instructions: {format_instructions}"),
+      ("placeholder", "{agent_scratchpad}"),
+  ])
+  chain = prompt | llm | parser
+  return chain.invoke({
+      'article1': article1,
+      'article2': article2,
+      'format_instructions': parser.get_format_instructions()
+  })
+result = analyze_policy_tone(article1, article2)
+print (result)

playground/test.py DELETED Viewed

@@ -1,29 +0,0 @@
-def generate_data_context(df):
-  """
-Generate a data context string for a given dataframe.
-Parameters:
-df (pandas.DataFrame): The input dataframe.
-Returns:
-str: The data context string containing schema description, \
-summary statistics, and sample data.
-"""
-  try:
-      schema_description = "This is a dataframe that will be passed internally. \
-        The database has the following columns: "
-      schema_description += ", ".join(
-          [f"{col} ({dtype})" for col, dtype in df.dtypes.items()])
-      summary_stats = df.describe().to_string()
-      sample_data = df.head().to_string()
-      data_context = f"{schema_description}.\n\
-    Summary Statistics:\n{summary_stats}\n\
-    Sample Data:\n{sample_data}"
-      return data_context
-  except Exception as e:
-      raise e

routes/main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from flask import request, jsonify
-from controllers.article_search_service import article_search
 from controllers.article_query_service import article_agent
 from . import bp
@@ -32,7 +33,6 @@ def search_articles():
         print("Error processing articles:", e)
         return jsonify({'error': 'Failed to process articles'}), 500
 @bp.route('/query', methods=['POST'])
 def handle_query():
     data = request.get_json()
@@ -56,3 +56,75 @@ def handle_query():
             return jsonify({'error': 'Failed to process the query'}), 500
     except Exception as e:
         return jsonify({'error': str(e)}), 500

+from controllers.policy_comparison_service import keyword_frequency, analyze_policy_tone, analyze_policy_description
 from flask import request, jsonify
+from controllers.article_search_service import article_search, article_search_by_id
 from controllers.article_query_service import article_agent
 from . import bp
         print("Error processing articles:", e)
         return jsonify({'error': 'Failed to process articles'}), 500
 @bp.route('/query', methods=['POST'])
 def handle_query():
     data = request.get_json()
             return jsonify({'error': 'Failed to process the query'}), 500
     except Exception as e:
         return jsonify({'error': str(e)}), 500
+@bp.route('/keyword', methods=['POST'])
+def handle_keyword():
+    data = request.get_json()
+    if not data:
+        return jsonify({'error': 'No data provided'}), 400
+    source = data.get('source')
+    keyword = data.get('keyword')
+    article1 = data.get('article1')
+    article2 = data.get('article2')
+    frequency_article1 = 0
+    frequency_article2 = 0
+    if not source or not keyword or not article1 or not article2:
+        return jsonify({'error': 'Missing parameters'}), 400
+    if source == 'external':
+        frequency_article1 = keyword_frequency(article1, keyword)
+        frequency_article2 = keyword_frequency(article2, keyword)
+    elif source == 'internal':
+        article1 = article_search_by_id(article1)
+        article2 = article_search_by_id(article2)
+        frequency_article1 = keyword_frequency(article1, keyword)
+        frequency_article2 = keyword_frequency(article2, keyword)
+        if frequency_article1 is None or frequency_article2 is None:
+            return jsonify({'error': 'failed to retreive the article'}), 400
+    return jsonify({'frequency_article1': frequency_article1, 'frequency_article2': frequency_article2})
+@bp.route('/tone', methods=['POST'])
+def compare_tone():
+    data = request.get_json()
+    if not data:
+        return jsonify({'error': 'No data provided'}), 400
+    source = data.get('source')
+    article1 = data.get('article1')
+    article2 = data.get('article2')
+    if not source or not article1 or not article2:
+        return jsonify({'error': 'Missing parameters'}), 400
+    try:
+        if source == 'internal':
+            article1 = article_search_by_id(article1)
+            article2 = article_search_by_id(article2)
+            if article1 is None or article2 is None:
+                return jsonify({'error': 'Failed to retrieve articles'}), 404
+        result = analyze_policy_tone(article1, article2)
+        return jsonify(result), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@bp.route('/compare', methods=['POST'])
+def compare_policy():
+    data = request.get_json()
+    if not data:
+        return jsonify({'error': 'No data provided'}), 400
+    source = data.get('source')
+    article1 = data.get('article1')
+    article2 = data.get('article2')
+    if not source or not article1 or not article2:
+        return jsonify({'error': 'Missing parameters'}), 400
+    try:
+        if source == 'internal':
+            article1 = article_search_by_id(article1)
+            article2 = article_search_by_id(article2)
+            if article1 is None or article2 is None:
+                return jsonify({'error': 'Failed to retrieve articles'}), 404
+        result = analyze_policy_description(article1, article2)
+        return jsonify(result), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500