nextchat / ai /extract.py
servionsoft's picture
Update ai/extract.py
be22b50 verified
import re
import openai
import os
from dotenv import load_dotenv
from utils.db import MongoDBUtil
from bson import ObjectId
from .moderation import check_moderation_text
# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
mongo_util = MongoDBUtil()
# Function to use GPT for extracting details
def gpt_extract_trip_details(sentence):
"""
Use OpenAI GPT to extract destination and number of days.
"""
prompt = (
f"Extract the following details from this sentence:\n"
f"Sentence: '{sentence}'\n\n"
f"Details to extract:\n"
f"1. Destination: (The place the user wants to travel to, None if not mentioned)\n"
f"2. Number of Days: (Always convert written numbers like 'ten', 'fifteen', 'twenty five' into digits like 10, 15, 25. Return None if not explicitly mentioned)\n\n"
f"Format the response as:\n"
f"Destination: [destination]\n"
f"Number of Days: [days]\n"
)
try:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are an assistant that extracts structured details from sentences."},
{"role": "user", "content": prompt}
],
max_tokens=150,
temperature=0.5
)
return response.choices[0].message.content.strip()
except Exception as e:
print(f"OpenAI API Error: {e}")
return None
# Function to parse GPT response
def parse_gpt_response(response):
"""
Parse GPT response into a structured dictionary and handle missing values.
"""
try:
result = {"destination": None, "num_days": None}
if not response:
return result
lines = response.split("\n")
for line in lines:
if ": " in line:
key, value = line.split(": ", 1)
key = key.lower().strip()
value = value.strip()
if key == "destination":
result["destination"] = value if value and value.lower() != "none" else None
elif key == "number of days":
if value.isdigit():
num_days = int(value)
if 1 <= num_days <= 31:
result["num_days"] = num_days
else:
result["num_days"] = None
else:
result["num_days"] = None
return result
except Exception as e:
print(f"Error parsing GPT response: {e}")
return {"destination": None, "num_days": None}
# Regex-based extraction
def extract_trip_details(sentence):
"""
Extract destination and number of days using regex.
"""
destination_pattern = r'(?:to|for)\s+(\w+(?:\s+\w+)*)'
days_pattern = r'(\d+)\s+(?:days|day)'
destination_match = re.search(destination_pattern, sentence, re.IGNORECASE)
destination = destination_match.group(1).strip() if destination_match else None
days_match = re.search(days_pattern, sentence, re.IGNORECASE)
num_days = int(days_match.group(1)) if days_match else None
# If "from" is present, it indicates origin, not destination
if "from" in sentence.lower():
destination = None
return {"destination": destination, "num_days": num_days}
# Main processing function
def extract_sentence(sentence, user):
"""
Complete pipeline for extracting travel details with fallback to regex if GPT fails.
"""
sentence = sentence.lower().strip() # Convert the sentence to lowercase before processing
print("Using GPT to extract travel details...")
gpt_response = gpt_extract_trip_details(sentence)
if gpt_response:
mongo_util.get_collection("users").update_one({"_id": ObjectId(user["_id"])},{"$inc": {"ai_gpt": -1}})
gpt_details = parse_gpt_response(gpt_response)
if gpt_details["destination"] is None and "from" in sentence.lower():
gpt_details["destination"] = None
return gpt_details
print("GPT extraction failed or returned no details. Falling back to regex extraction...")
return extract_trip_details(sentence)
# Main entry point
if __name__ == "__main__":
sentence = input("Enter a trip-related sentence: ")
result = extract_sentence(sentence)
print("Extracted Travel Details:")
print(result)