Spaces:

Oxbridge-Economics
/

AI-News-Agent

Running

Muhammad Abdur Rahman Saad commited on 5 days ago

Commit

08d10af

•

1 Parent(s): 7eaf7cf

fix tokenizer import

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -9,9 +9,6 @@ COPY --chown=user . /app
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
-# Download nltk punkt tokenizer models
-RUN python -m nltk.downloader punkt
 # Make port 7860 available to the world outside this container
 EXPOSE 7860

 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Make port 7860 available to the world outside this container
 EXPOSE 7860

controllers/policy_comparison_service.py CHANGED Viewed

@@ -6,7 +6,7 @@ from textblob import TextBlob
 import nltk
 import os
-nltk.download('punkt_tab')
 os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
 os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
@@ -18,7 +18,21 @@ llm = AzureChatOpenAI(
     azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
     openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
 def keyword_frequency(article_text, keyword):
     blob = TextBlob(article_text)
     return blob.words.count(keyword, case_sensitive=True)
@@ -46,6 +60,7 @@ def analyze_policy_tone(article1, article2):
   Returns:
   dict: Results from the policy comparison.
   """
   # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
   parser = PydanticOutputParser(pydantic_object=Policy_tone)
   prompt = ChatPromptTemplate.from_messages([
@@ -90,6 +105,7 @@ def analyze_policy_description(article1: str, article2: str):
     Returns:
         dict: Results from the policy and description comparison.
     """
     parser = PydanticOutputParser(pydantic_object=Policy_desc)
     prompt = ChatPromptTemplate.from_messages([
         ("system", "You are a Current Affairs Commentator.\

 import nltk
 import os
+nltk.download('punkt')
 os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
 os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
     azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
     openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
+def ensure_punkt_downloaded():
+    try:
+        # Check if 'punkt' tokenizer is available
+        nltk.data.find('tokenizers/punkt')
+    except LookupError:
+        # If not present, download it
+        print("Downloading 'punkt' tokenizer...")
+        nltk.download('punkt')
+        print("'punkt' tokenizer has been downloaded.")
+    else:
+        print("'punkt' tokenizer is already downloaded.")
 def keyword_frequency(article_text, keyword):
+    ensure_punkt_downloaded()
     blob = TextBlob(article_text)
     return blob.words.count(keyword, case_sensitive=True)
   Returns:
   dict: Results from the policy comparison.
   """
+  ensure_punkt_downloaded()
   # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
   parser = PydanticOutputParser(pydantic_object=Policy_tone)
   prompt = ChatPromptTemplate.from_messages([
     Returns:
         dict: Results from the policy and description comparison.
     """
+    ensure_punkt_downloaded()
     parser = PydanticOutputParser(pydantic_object=Policy_desc)
     prompt = ChatPromptTemplate.from_messages([
         ("system", "You are a Current Affairs Commentator.\