Spaces:

Dimitre
/

Gemma-Hangman

Runtime error

App Files Files Community

Dimitre commited on Mar 31

Commit

d8e827d

•

1 Parent(s): 1586c56

refactoring app

Browse files

Files changed (3) hide show

app.py +3 -137
hangman.py +35 -0
hf_utils.py +109 -0

app.py CHANGED Viewed

@@ -1,28 +1,19 @@
 import logging
 import os
-import string
-import re
 import streamlit as st
-from streamlit import session_state
 import torch
 from dotenv import load_dotenv
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
-# from common import CATEGORIES, MAX_TRIES, configs
-# from hangman import guess_letter
-# from hf_utils import query_hint, query_word
 CONFIGS_PATH = "configs.yaml"
 MAX_TRIES = 6
 CATEGORIES = ["Country", "Animal", "Food", "Movie"]
-GEMMA_WORD_PATTERNS = [
-    "(?<=\*)(.*?)(?=\*)",
-    '(?<=")(.*?)(?=")',
-]
 configs = {
     "os_model": "google/gemma-2b-it",
     "device": "cpu",
@@ -35,131 +26,6 @@ configs = {
 }
-def guess_letter(letter: str, session: session_state) -> session_state:
-    """Take a letter and evaluate if it is part of the hangman puzzle
-    then updates the session object accordingly.
-    Args:Chosen letter
-        letter (str): Streamlit session object
-        session (session_state): _description_
-    Returns:
-        session_state: Updated session
-    """
-    logger.info(f"Letter '{letter}' picked")
-    if letter in session["word"]:
-        session["correct_letters"].append(letter)
-    else:
-        session["missed_letters"].append(letter)
-    hangman = "".join(
-        [
-            (letter if letter in session["correct_letters"] else "_")
-            for letter in session["word"]
-        ]
-    )
-    session["hangman"] = hangman
-    logger.info("Session state updated")
-    return session
-def query_hf(
-    query: str,
-    model: AutoModelForCausalLM,
-    tokenizer: AutoTokenizer,
-    generation_config: dict,
-    device: str,
-) -> str:
-    """Queries an LLM model using the Vertex AI API.
-    Args:
-        query (str): Query sent to the Vertex API
-        model (str): Model target by Vertex
-        generation_config (dict): Configurations used by the model
-    Returns:
-        str: Vertex AI text response
-    """
-    generation_config = GenerationConfig(
-        do_sample=True,
-        max_new_tokens=generation_config["max_output_tokens"],
-        top_k=generation_config["top_k"],
-        top_p=generation_config["top_p"],
-        temperature=generation_config["temperature"],
-    )
-    input_ids = tokenizer(query, return_tensors="pt").to(device)
-    outputs = model.generate(**input_ids, generation_config=generation_config)
-    outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    outputs = outputs.replace(query, "")
-    return outputs
-def query_word(
-    category: str,
-    model: AutoModelForCausalLM,
-    tokenizer: AutoTokenizer,
-    generation_config: dict,
-    device: str,
-) -> str:
-    """Queries a word to be used for the hangman game.
-    Args:
-        category (str): Category used as source sample a word
-        model (str): Model target by Vertex
-        generation_config (dict): Configurations used by the model
-    Returns:
-        str: Queried word
-    """
-    logger.info(f"Quering word for category: '{category}'...")
-    query = f"Name a single existing {category}."
-    matched_word = ""
-    while not matched_word:
-        # word = query_hf(query, model, tokenizer, generation_config, device)
-        word = "placeholder word"
-        # Extract word of interest from Gemma's output
-        for pattern in GEMMA_WORD_PATTERNS:
-            matched_words = re.findall(rf"{pattern}", word)
-            matched_words = [x for x in matched_words if x != ""]
-            if matched_words:
-                matched_word = matched_words[-1]
-    matched_word = matched_word.translate(str.maketrans("", "", string.punctuation))
-    matched_word = matched_word.lower()
-    logger.info("Word queried successful")
-    return matched_word
-def query_hint(
-    word: str,
-    model: AutoModelForCausalLM,
-    tokenizer: AutoTokenizer,
-    generation_config: dict,
-    device: str,
-) -> str:
-    """Queries a hint for the hangman game.
-    Args:
-        word (str): Word used as source to create the hint
-        model (str): Model target by Vertex
-        generation_config (dict): Configurations used by the model
-    Returns:
-        str: Queried hint
-    """
-    logger.info(f"Quering hint for word: '{word}'...")
-    query = f"Describe the word '{word}' without mentioning it."
-    # hint = query_hf(query, model, tokenizer, generation_config, device)
-    hint = "placeholder hint"
-    hint = re.sub(re.escape(word), "***", hint, flags=re.IGNORECASE)
-    logger.info("Hint queried successful")
-    return hint
 @st.cache_resource()
 def setup(model_id: str, device: str) -> None:
     """Initializes the model and tokenizer.

 import logging
 import os
 import streamlit as st
 import torch
 from dotenv import load_dotenv
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from hangman import guess_letter
+from hf_utils import query_hint, query_word
 CONFIGS_PATH = "configs.yaml"
 MAX_TRIES = 6
 CATEGORIES = ["Country", "Animal", "Food", "Movie"]
 configs = {
     "os_model": "google/gemma-2b-it",
     "device": "cpu",
 }
 @st.cache_resource()
 def setup(model_id: str, device: str) -> None:
     """Initializes the model and tokenizer.

hangman.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import logging
+from streamlit import session_state
+def guess_letter(letter: str, session: session_state) -> session_state:
+    """Take a letter and evaluate if it is part of the hangman puzzle
+    then updates the session object accordingly.
+    Args:Chosen letter
+        letter (str): Streamlit session object
+        session (session_state): _description_
+    Returns:
+        session_state: Updated session
+    """
+    logger.info(f"Letter '{letter}' picked")
+    if letter in session["word"]:
+        session["correct_letters"].append(letter)
+    else:
+        session["missed_letters"].append(letter)
+    hangman = "".join(
+        [
+            (letter if letter in session["correct_letters"] else "_")
+            for letter in session["word"]
+        ]
+    )
+    session["hangman"] = hangman
+    logger.info("Session state updated")
+    return session
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__file__)

hf_utils.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import logging
+import re
+import string
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+GEMMA_WORD_PATTERNS = [
+    "(?<=\*)(.*?)(?=\*)",
+    '(?<=")(.*?)(?=")',
+]
+def query_hf(
+    query: str,
+    model: AutoModelForCausalLM,
+    tokenizer: AutoTokenizer,
+    generation_config: dict,
+    device: str,
+) -> str:
+    """Queries an LLM model using the Vertex AI API.
+    Args:
+        query (str): Query sent to the Vertex API
+        model (str): Model target by Vertex
+        generation_config (dict): Configurations used by the model
+    Returns:
+        str: Vertex AI text response
+    """
+    generation_config = GenerationConfig(
+        do_sample=True,
+        max_new_tokens=generation_config["max_output_tokens"],
+        top_k=generation_config["top_k"],
+        top_p=generation_config["top_p"],
+        temperature=generation_config["temperature"],
+    )
+    input_ids = tokenizer(query, return_tensors="pt").to(device)
+    outputs = model.generate(**input_ids, generation_config=generation_config)
+    outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    outputs = outputs.replace(query, "")
+    return outputs
+def query_word(
+    category: str,
+    model: AutoModelForCausalLM,
+    tokenizer: AutoTokenizer,
+    generation_config: dict,
+    device: str,
+) -> str:
+    """Queries a word to be used for the hangman game.
+    Args:
+        category (str): Category used as source sample a word
+        model (str): Model target by Vertex
+        generation_config (dict): Configurations used by the model
+    Returns:
+        str: Queried word
+    """
+    logger.info(f"Quering word for category: '{category}'...")
+    query = f"Name a single existing {category}."
+    matched_word = ""
+    while not matched_word:
+        word = query_hf(query, model, tokenizer, generation_config, device)
+        # Extract word of interest from Gemma's output
+        for pattern in GEMMA_WORD_PATTERNS:
+            matched_words = re.findall(rf"{pattern}", word)
+            matched_words = [x for x in matched_words if x != ""]
+            if matched_words:
+                matched_word = matched_words[-1]
+    matched_word = matched_word.translate(str.maketrans("", "", string.punctuation))
+    matched_word = matched_word.lower()
+    logger.info("Word queried successful")
+    return matched_word
+def query_hint(
+    word: str,
+    model: AutoModelForCausalLM,
+    tokenizer: AutoTokenizer,
+    generation_config: dict,
+    device: str,
+) -> str:
+    """Queries a hint for the hangman game.
+    Args:
+        word (str): Word used as source to create the hint
+        model (str): Model target by Vertex
+        generation_config (dict): Configurations used by the model
+    Returns:
+        str: Queried hint
+    """
+    logger.info(f"Quering hint for word: '{word}'...")
+    query = f"Describe the word '{word}' without mentioning it."
+    hint = query_hf(query, model, tokenizer, generation_config, device)
+    hint = re.sub(re.escape(word), "***", hint, flags=re.IGNORECASE)
+    logger.info("Hint queried successful")
+    return hint
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__file__)