Spaces:

awacke1
/

RescuerOfStolenBikes

Running

App Files Files Community

awacke1 commited on 10 days ago

Commit

1857a5e

•

1 Parent(s): bc2a176

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -87

app.py CHANGED Viewed

@@ -91,28 +91,8 @@ FILE_EMOJIS = {
     "mp3": "🎵",
 }
-def get_high_info_terms(text: str, prioritize_start=True) -> list:
-    """🧠 #1 - The Neural Network for Filenames (but way simpler and probably underpaid)
-    Scans text like a caffeinated librarian on a mission, hunting for words that actually
-    mean something. Filters out boring words like 'the' and 'and' (sorry old friends),
-    while preserving the good stuff like 'quantum' and 'neural' (party time! 🎉).
-    Think of it as a bouncer for your filenames - if a word isn't cool enough,
-    it's not getting in. But key phrases? VIP access, baby! 🎭
-    Args:
-        text (str): The text to strip mine for linguistic gold
-        prioritize_start (bool): If True, treats the start like the cool kids' table
-                               (default behavior because we're not monsters)
-    Returns:
-        list: The VIP list of words that made the cut. Maximum of 8 terms if we're
-              prioritizing the start (because YOLO), 5 otherwise (because sanity).
-    Warning: May occasionally let through a word that sounds smart but is actually
-            just showing off. We're working on its ego. 🎭
-    """
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
@@ -120,9 +100,7 @@ def get_high_info_terms(text: str, prioritize_start=True) -> list:
         'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
         'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
         'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
-        'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'please', 'tell',
-        'explain', 'show', 'give', 'write', 'provide', 'need', 'want', 'would', 'could',
-        'lets', 'let', 'try', 'use', 'make', 'help'
     ])
     key_phrases = [
@@ -135,48 +113,23 @@ def get_high_info_terms(text: str, prioritize_start=True) -> list:
         'research paper', 'scientific study', 'empirical analysis'
     ]
-    # First check for key phrases at the start of the text
-    start_phrases = []
-    lower_text = text.lower()
-    text_start = lower_text[:100]  # Look at first 100 chars for starting phrases
-    for phrase in key_phrases:
-        if text_start.startswith(phrase) or text_start.find(f" {phrase}") >= 0:
-            start_phrases.append(phrase)
-            text = text.replace(phrase, '')
-    # Then check for key phrases in the rest of the text
     preserved_phrases = []
     for phrase in key_phrases:
-        if phrase in lower_text and phrase not in start_phrases:
             preserved_phrases.append(phrase)
             text = text.replace(phrase, '')
-    # Get the first ~50 words to analyze the start more carefully
-    start_words = text.split()[:50]
-    words_with_pos = []
-    for pos, word in enumerate(start_words):
-        word = re.sub(r'[^\w\s-]', '', word.lower())
-        if (len(word) > 3 and
-            word not in stop_words and
-            not word.isdigit() and
-            any(c.isalpha() for c in word)):
-            words_with_pos.append((pos, word))
-    # Get remaining high-info words from the rest of the text
-    remaining_words = re.findall(r'\b\w+(?:-\w+)*\b', text[100:])
     high_info_words = [
-        word.lower() for word in remaining_words
         if len(word) > 3
         and word.lower() not in stop_words
         and not word.isdigit()
         and any(c.isalpha() for c in word)
     ]
-    # Combine terms prioritizing start content
-    start_terms = [word for _, word in sorted(words_with_pos, key=lambda x: x[0])][:3]
-    all_terms = (start_phrases + start_terms + preserved_phrases + high_info_words)
-    # Remove duplicates while preserving order
     seen = set()
     unique_terms = []
     for term in all_terms:
@@ -184,50 +137,22 @@ def get_high_info_terms(text: str, prioritize_start=True) -> list:
             seen.add(term)
             unique_terms.append(term)
-    max_terms = 8 if prioritize_start else 5
     return unique_terms[:max_terms]
 def generate_filename(content, file_type="md"):
-    """🎯 #2 - The File Naming Sommelier (pairs well with frustrated developers)
-    Takes your content and turns it into a filename that's actually readable by humans!
-    A revolutionary concept, we know. Combines timestamps with meaningful words,
-    because '20231218_quantum_research' beats 'asdfg123.md' any day of the week.
-    Think of it as your personal file naming barista - takes your raw content beans
-    and turns them into a smooth, well-crafted filename. No foam art though, sorry! ☕
-    Args:
-        content (str): Your beautiful text that needs a home(name)
-        file_type (str): The file extension (defaults to "md" because we're markdown
-                        hipsters at heart)
-    Returns:
-        str: A filename that won't make you question your life choices when you see it
-            in 6 months. Limited to 120 chars because we're not writing a novel here.
-    Pro Tip: If your filename ends up being just 'file.md', either your content was
-            empty or we've failed spectacularly. Please file a bug report or just
-            laugh it off. 🎪
-    """
     prefix = datetime.now().strftime("%y%m_%H%M") + "_"
-    # Get high-info terms with start prioritization
-    info_terms = get_high_info_terms(content, prioritize_start=True)
-    # Create filename with terms
     name_text = '_'.join(term.replace(' ', '-') for term in info_terms) if info_terms else 'file'
-    # Ensure reasonable length
-    max_length = 120  # Increased to allow more meaningful content
     if len(name_text) > max_length:
         name_text = name_text[:max_length]
     filename = f"{prefix}{name_text}.{file_type}"
     return filename
 # 7. Audio Processing
 def clean_for_speech(text: str) -> str:
     text = text.replace("\n", " ")

     "mp3": "🎵",
 }
+# 5. High-Information Content Extraction
+def get_high_info_terms(text: str) -> list:
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
         'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
         'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
         'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
+        'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there'
     ])
     key_phrases = [
         'research paper', 'scientific study', 'empirical analysis'
     ]
     preserved_phrases = []
+    lower_text = text.lower()
     for phrase in key_phrases:
+        if phrase in lower_text:
             preserved_phrases.append(phrase)
             text = text.replace(phrase, '')
+    words = re.findall(r'\b\w+(?:-\w+)*\b', text)
     high_info_words = [
+        word.lower() for word in words
         if len(word) > 3
         and word.lower() not in stop_words
         and not word.isdigit()
         and any(c.isalpha() for c in word)
     ]
+    all_terms = preserved_phrases + high_info_words
     seen = set()
     unique_terms = []
     for term in all_terms:
             seen.add(term)
             unique_terms.append(term)
+    max_terms = 5
     return unique_terms[:max_terms]
+# 6. Filename Generation
 def generate_filename(content, file_type="md"):
     prefix = datetime.now().strftime("%y%m_%H%M") + "_"
+    info_terms = get_high_info_terms(content)
     name_text = '_'.join(term.replace(' ', '-') for term in info_terms) if info_terms else 'file'
+    max_length = 100
     if len(name_text) > max_length:
         name_text = name_text[:max_length]
     filename = f"{prefix}{name_text}.{file_type}"
     return filename
 # 7. Audio Processing
 def clean_for_speech(text: str) -> str:
     text = text.replace("\n", " ")