# Contains only NLP text summarization function codes! import re # Python's built-in library for regular expressions (or Regex) import sqlite3 from flask import g from transformers import pipeline def summarize(Entered_story): # Check if the input is empty if not Entered_story.strip(): raise ValueError("Empty input!") # Validate that the input is in the correct format if not validate_story(Entered_story): raise ValueError("Incorrect format!") # Before we do anything, make sure the input is long enough for summarization. if len(Entered_story) < 200: raise ValueError("Invalid length!") # Set the pipeline to use the correct NLP type and model summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Take note: The max_length & min_length variables refer to the OUTPUT length! summary = summarizer(Entered_story, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] return summary # User Input Format Validation Function for english only def validate_story(Entered_story): pattern = r'As a (?P[^,.]+), I want to (?P[^,.]+)(,|.)+so that (?P.+)' match = re.search(pattern, Entered_story, flags=re.DOTALL) return bool(match) # User Input Format Validation Function for all 4 languages #def validate_story(Entered_story): # Pattern for English language #pattern_en = r'As a (?P[^,.]+), I want to (?P[^,.]+)(,|.)+so that (?P.+)' # Pattern for Chinese language #pattern_zh = r'作为(?P[^,.]+),我想要(?P[^,.]+)(,|。)+以便(?P.+)' # Pattern for Malay language #pattern_ms = r'Sebagai(?P[^,.]+), saya mahu(?P[^,.]+)(,|.)+supaya(?P.+)' # Pattern for Tamil language #pattern_ta = r'என(?P[^,.]+) எனக்கு வேண்டும்(?P[^,.]+)(,|.)+அதனால்(?P.+) பயன்படுத்தி வைக்கும்' # Pattern for Thai language #pattern_th = r'ในฐานะ(?P[^,.]+) ฉันต้องการ(?P[^,.]+)(,|.)+เพื่อที่ฉัน(?P.+)' # Try each pattern to see if there is a match #match_en = re.search(pattern_en, Entered_story, flags=re.DOTALL) #match_zh = re.search(pattern_zh, Entered_story, flags=re.DOTALL) #match_ms = re.search(pattern_ms, Entered_story, flags=re.DOTALL) #match_ta = re.search(pattern_ta, Entered_story, flags=re.DOTALL) #match_th = re.search(pattern_th, Entered_story, flags=re.DOTALL) # Return True if at least one pattern matches, otherwise False #return bool(match_en or match_zh or match_ms or match_ta or match_th) # Function to grab all contents in the "TextSummarization" table (except for unique ids) # If adding any additional attributes to the table, this has to be updated accordingly def getTextSumContents(): db = getattr(g, '_database', None) # Gets the _database attribute from the 'g' object. If it does not exist, returns 'None' if db is None: db = g._database = sqlite3.connect('Refineverse.db') # If db is None, create a new connection for db and g._database. cursor = db.cursor() # Creates a cursor object to handle data cursor.execute("SELECT Entered_story, summary FROM TextSummarization") # The cursor executes the query rows = cursor.fetchall() # Stores the results of fetchall() into a variable return rows # Function to insert a new row into the "TextSummarization" table # Using "with" for the connection here seems important, as otherwise it results in an exception def insertTextSumRow( Entered_story, summary): with sqlite3.connect('Refineverse.db') as conn: # 'With' will automatically take care of closing and opening the connection cursor = conn.cursor() cursor.execute("INSERT INTO TextSummarization (Entered_story, summary) VALUES (?, ?)", (Entered_story, summary)) conn.commit()