Muhammad Abdur Rahman Saad
commited on
Commit
•
08d10af
1
Parent(s):
7eaf7cf
fix tokenizer import
Browse files- Dockerfile +0 -3
- controllers/policy_comparison_service.py +17 -1
Dockerfile
CHANGED
@@ -9,9 +9,6 @@ COPY --chown=user . /app
|
|
9 |
# Install any needed packages specified in requirements.txt
|
10 |
RUN pip install --no-cache-dir -r requirements.txt
|
11 |
|
12 |
-
# Download nltk punkt tokenizer models
|
13 |
-
RUN python -m nltk.downloader punkt
|
14 |
-
|
15 |
# Make port 7860 available to the world outside this container
|
16 |
EXPOSE 7860
|
17 |
|
|
|
9 |
# Install any needed packages specified in requirements.txt
|
10 |
RUN pip install --no-cache-dir -r requirements.txt
|
11 |
|
|
|
|
|
|
|
12 |
# Make port 7860 available to the world outside this container
|
13 |
EXPOSE 7860
|
14 |
|
controllers/policy_comparison_service.py
CHANGED
@@ -6,7 +6,7 @@ from textblob import TextBlob
|
|
6 |
import nltk
|
7 |
import os
|
8 |
|
9 |
-
nltk.download('
|
10 |
|
11 |
os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
|
12 |
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
|
@@ -18,7 +18,21 @@ llm = AzureChatOpenAI(
|
|
18 |
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
|
19 |
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def keyword_frequency(article_text, keyword):
|
|
|
22 |
blob = TextBlob(article_text)
|
23 |
return blob.words.count(keyword, case_sensitive=True)
|
24 |
|
@@ -46,6 +60,7 @@ def analyze_policy_tone(article1, article2):
|
|
46 |
Returns:
|
47 |
dict: Results from the policy comparison.
|
48 |
"""
|
|
|
49 |
# Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
|
50 |
parser = PydanticOutputParser(pydantic_object=Policy_tone)
|
51 |
prompt = ChatPromptTemplate.from_messages([
|
@@ -90,6 +105,7 @@ def analyze_policy_description(article1: str, article2: str):
|
|
90 |
Returns:
|
91 |
dict: Results from the policy and description comparison.
|
92 |
"""
|
|
|
93 |
parser = PydanticOutputParser(pydantic_object=Policy_desc)
|
94 |
prompt = ChatPromptTemplate.from_messages([
|
95 |
("system", "You are a Current Affairs Commentator.\
|
|
|
6 |
import nltk
|
7 |
import os
|
8 |
|
9 |
+
nltk.download('punkt')
|
10 |
|
11 |
os.environ["AZURE_OPENAI_API_KEY"] = "b9135a15c242432cb20ddc43fea3a413"
|
12 |
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://openai-oe.openai.azure.com/"
|
|
|
18 |
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
|
19 |
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"])
|
20 |
|
21 |
+
def ensure_punkt_downloaded():
|
22 |
+
try:
|
23 |
+
# Check if 'punkt' tokenizer is available
|
24 |
+
nltk.data.find('tokenizers/punkt')
|
25 |
+
except LookupError:
|
26 |
+
# If not present, download it
|
27 |
+
print("Downloading 'punkt' tokenizer...")
|
28 |
+
nltk.download('punkt')
|
29 |
+
print("'punkt' tokenizer has been downloaded.")
|
30 |
+
else:
|
31 |
+
print("'punkt' tokenizer is already downloaded.")
|
32 |
+
|
33 |
+
|
34 |
def keyword_frequency(article_text, keyword):
|
35 |
+
ensure_punkt_downloaded()
|
36 |
blob = TextBlob(article_text)
|
37 |
return blob.words.count(keyword, case_sensitive=True)
|
38 |
|
|
|
60 |
Returns:
|
61 |
dict: Results from the policy comparison.
|
62 |
"""
|
63 |
+
ensure_punkt_downloaded()
|
64 |
# Assuming PydanticOutputParser and ChatPromptTemplate are predefined elsewhere
|
65 |
parser = PydanticOutputParser(pydantic_object=Policy_tone)
|
66 |
prompt = ChatPromptTemplate.from_messages([
|
|
|
105 |
Returns:
|
106 |
dict: Results from the policy and description comparison.
|
107 |
"""
|
108 |
+
ensure_punkt_downloaded()
|
109 |
parser = PydanticOutputParser(pydantic_object=Policy_desc)
|
110 |
prompt = ChatPromptTemplate.from_messages([
|
111 |
("system", "You are a Current Affairs Commentator.\
|