Spaces:
Sleeping
Sleeping
# Contains only NLP text summarization function codes! | |
import re # Python's built-in library for regular expressions (or Regex) | |
import sqlite3 | |
from flask import g | |
from transformers import pipeline | |
def summarize(Entered_story): | |
# Check if the input is empty | |
if not Entered_story.strip(): | |
raise ValueError("Empty input!") | |
# Validate that the input is in the correct format | |
if not validate_story(Entered_story): | |
raise ValueError("Incorrect format!") | |
# Before we do anything, make sure the input is long enough for summarization. | |
if len(Entered_story) < 200: | |
raise ValueError("Invalid length!") | |
# Set the pipeline to use the correct NLP type and model | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Take note: The max_length & min_length variables refer to the OUTPUT length! | |
summary = summarizer(Entered_story, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] | |
return summary | |
# User Input Format Validation Function for english only | |
def validate_story(Entered_story): | |
pattern = r'As a (?P<role>[^,.]+), I want to (?P<goal>[^,.]+)(,|.)+so that (?P<benefit>.+)' | |
match = re.search(pattern, Entered_story, flags=re.DOTALL) | |
return bool(match) | |
# User Input Format Validation Function for all 4 languages | |
#def validate_story(Entered_story): | |
# Pattern for English language | |
#pattern_en = r'As a (?P<role>[^,.]+), I want to (?P<goal>[^,.]+)(,|.)+so that (?P<benefit>.+)' | |
# Pattern for Chinese language | |
#pattern_zh = r'作为(?P<role>[^,.]+),我想要(?P<goal>[^,.]+)(,|。)+以便(?P<benefit>.+)' | |
# Pattern for Malay language | |
#pattern_ms = r'Sebagai(?P<role>[^,.]+), saya mahu(?P<goal>[^,.]+)(,|.)+supaya(?P<benefit>.+)' | |
# Pattern for Tamil language | |
#pattern_ta = r'என(?P<role>[^,.]+) எனக்கு வேண்டும்(?P<goal>[^,.]+)(,|.)+அதனால்(?P<benefit>.+) பயன்படுத்தி வைக்கும்' | |
# Pattern for Thai language | |
#pattern_th = r'ในฐานะ(?P<role>[^,.]+) ฉันต้องการ(?P<goal>[^,.]+)(,|.)+เพื่อที่ฉัน(?P<benefit>.+)' | |
# Try each pattern to see if there is a match | |
#match_en = re.search(pattern_en, Entered_story, flags=re.DOTALL) | |
#match_zh = re.search(pattern_zh, Entered_story, flags=re.DOTALL) | |
#match_ms = re.search(pattern_ms, Entered_story, flags=re.DOTALL) | |
#match_ta = re.search(pattern_ta, Entered_story, flags=re.DOTALL) | |
#match_th = re.search(pattern_th, Entered_story, flags=re.DOTALL) | |
# Return True if at least one pattern matches, otherwise False | |
#return bool(match_en or match_zh or match_ms or match_ta or match_th) | |
# Function to grab all contents in the "TextSummarization" table (except for unique ids) | |
# If adding any additional attributes to the table, this has to be updated accordingly | |
def getTextSumContents(): | |
db = getattr(g, '_database', None) # Gets the _database attribute from the 'g' object. If it does not exist, returns 'None' | |
if db is None: | |
db = g._database = sqlite3.connect('Refineverse.db') # If db is None, create a new connection for db and g._database. | |
cursor = db.cursor() # Creates a cursor object to handle data | |
cursor.execute("SELECT Entered_story, summary FROM TextSummarization") # The cursor executes the query | |
rows = cursor.fetchall() # Stores the results of fetchall() into a variable | |
return rows | |
# Function to insert a new row into the "TextSummarization" table | |
# Using "with" for the connection here seems important, as otherwise it results in an exception | |
def insertTextSumRow( Entered_story, summary): | |
with sqlite3.connect('Refineverse.db') as conn: # 'With' will automatically take care of closing and opening the connection | |
cursor = conn.cursor() | |
cursor.execute("INSERT INTO TextSummarization (Entered_story, summary) VALUES (?, ?)", (Entered_story, summary)) | |
conn.commit() | |