Muhammad Abdur Rahman Saad commited on
Commit
08d10af
1 Parent(s): 7eaf7cf

fix tokenizer import

Browse files
Dockerfile CHANGED
@@ -9,9 +9,6 @@ COPY --chown=user . /app
9
  # Install any needed packages specified in requirements.txt
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
- # Download nltk punkt tokenizer models
13
- RUN python -m nltk.downloader punkt
14
-
15
  # Make port 7860 available to the world outside this container
16
  EXPOSE 7860
17
 
 
9
  # Install any needed packages specified in requirements.txt
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
 
 
 
12
  # Make port 7860 available to the world outside this container
13
  EXPOSE 7860
14
 
controllers/policy_comparison_service.py CHANGED
@@ -6,7 +6,7 @@ from textblob import TextBlob
6
  import nltk
7
  import os
8
 
9
- nltk.download('punkt_tab')
10
 
11
  os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
12
  os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
@@ -18,7 +18,21 @@ llm = AzureChatOpenAI(
18
  azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
19
  openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def keyword_frequency(article_text, keyword):
 
22
  blob = TextBlob(article_text)
23
  return blob.words.count(keyword, case_sensitive=True)
24
 
@@ -46,6 +60,7 @@ def analyze_policy_tone(article1, article2):
46
  Returns:
47
  dict: Results from the policy comparison.
48
  """
 
49
  # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
50
  parser = PydanticOutputParser(pydantic_object=Policy_tone)
51
  prompt = ChatPromptTemplate.from_messages([
@@ -90,6 +105,7 @@ def analyze_policy_description(article1: str, article2: str):
90
  Returns:
91
  dict: Results from the policy and description comparison.
92
  """
 
93
  parser = PydanticOutputParser(pydantic_object=Policy_desc)
94
  prompt = ChatPromptTemplate.from_messages([
95
  ("system", "You are a Current Affairs Commentator.\
 
6
  import nltk
7
  import os
8
 
9
+ nltk.download('punkt')
10
 
11
  os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
12
  os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
 
18
  azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
19
  openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
20
 
21
+ def ensure_punkt_downloaded():
22
+ try:
23
+ # Check if 'punkt' tokenizer is available
24
+ nltk.data.find('tokenizers/punkt')
25
+ except LookupError:
26
+ # If not present, download it
27
+ print("Downloading 'punkt' tokenizer...")
28
+ nltk.download('punkt')
29
+ print("'punkt' tokenizer has been downloaded.")
30
+ else:
31
+ print("'punkt' tokenizer is already downloaded.")
32
+
33
+
34
  def keyword_frequency(article_text, keyword):
35
+ ensure_punkt_downloaded()
36
  blob = TextBlob(article_text)
37
  return blob.words.count(keyword, case_sensitive=True)
38
 
 
60
  Returns:
61
  dict: Results from the policy comparison.
62
  """
63
+ ensure_punkt_downloaded()
64
  # Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
65
  parser = PydanticOutputParser(pydantic_object=Policy_tone)
66
  prompt = ChatPromptTemplate.from_messages([
 
105
  Returns:
106
  dict: Results from the policy and description comparison.
107
  """
108
+ ensure_punkt_downloaded()
109
  parser = PydanticOutputParser(pydantic_object=Policy_desc)
110
  prompt = ChatPromptTemplate.from_messages([
111
  ("system", "You are a Current Affairs Commentator.\