Spaces:

lilmeaty
/

gcs

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 25, 2024

Commit

f05e47d

verified ·

1 Parent(s): 1f3523a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -13

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from dotenv import load_dotenv
 import huggingface_hub
 from threading import Thread
 from typing import AsyncIterator, List, Dict
-from transformers import StoppingCriteria, StoppingCriteriaList
 import torch
 load_dotenv()
@@ -135,7 +135,7 @@ model_loader = GCSModelLoader(bucket)
 @app.post("/generate")
 async def generate(request: GenerateRequest):
     model_name = request.model_name
-    input_text = request.input_text
     task_type = request.task_type
     requested_max_new_tokens = request.max_new_tokens
     generation_params = request.model_dump(
@@ -153,12 +153,10 @@ async def generate(request: GenerateRequest):
         config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
         stopping_criteria_list = StoppingCriteriaList()
-        # Add user-defined stopping strings if provided
         if user_defined_stopping_strings:
             stop_words_ids = [tokenizer.encode(stop_string, add_special_tokens=False) for stop_string in user_defined_stopping_strings]
             stopping_criteria_list.append(StopOnKeywords(stop_words_ids))
-        # Automatically add EOS token as a stopping criterion
         if config.eos_token_id is not None:
             eos_token_ids = [config.eos_token_id]
             if isinstance(config.eos_token_id, int):
@@ -172,10 +170,11 @@ async def generate(request: GenerateRequest):
             stopping_criteria_list.append(StopOnKeywords(stop_words_ids_eos))
         async def generate_responses() -> AsyncIterator[Dict[str, List[Dict[str, str]]]]:
             all_generated_text = ""
-            stop_reason = None  # To track why the generation stopped
-            while True: # Loop indefinitely, relying on stopping criteria
                 text_pipeline = pipeline(
                     task_type,
                     model=model_name,
@@ -183,11 +182,11 @@ async def generate(request: GenerateRequest):
                     token=HUGGINGFACE_HUB_TOKEN,
                     stopping_criteria=stopping_criteria_list,
                     **generation_params,
-                    max_new_tokens=requested_max_new_tokens  # Generate in chunks
                 )
-                def generate_on_thread(pipeline, input_text, output_queue):
-                    result = pipeline(input_text)
                     output_queue.put_nowait(result)
                 output_queue = asyncio.Queue()
@@ -199,12 +198,11 @@ async def generate(request: GenerateRequest):
                 newly_generated_text = result[0]['generated_text'][len(all_generated_text):]
                 if not newly_generated_text:
-                    break # Should ideally not happen with proper stopping criteria
                 all_generated_text += newly_generated_text
                 yield {"response": [{'generated_text': newly_generated_text}]}
-                # Check if any stopping criteria was met
                 if stopping_criteria_list:
                     for criteria in stopping_criteria_list:
                         if isinstance(criteria, StopOnKeywords) and criteria.current_encounters > 0:
@@ -213,7 +211,6 @@ async def generate(request: GenerateRequest):
                     if stop_reason:
                         break
-                # If the generated text seems to match the EOS token, stop
                 if config.eos_token_id is not None:
                     eos_tokens = [config.eos_token_id]
                     if isinstance(config.eos_token_id, int):
@@ -230,7 +227,6 @@ async def generate(request: GenerateRequest):
                     stop_reason = "eos_token"
                     break
-                # Update input text for the next iteration
                 input_text = all_generated_text
         async def text_stream():

 import huggingface_hub
 from threading import Thread
 from typing import AsyncIterator, List, Dict
+from transformers.stopping_criteria import StoppingCriteria, StoppingCriteriaList
 import torch
 load_dotenv()
 @app.post("/generate")
 async def generate(request: GenerateRequest):
     model_name = request.model_name
+    input_text = request.input_text  # Initialize input_text here
     task_type = request.task_type
     requested_max_new_tokens = request.max_new_tokens
     generation_params = request.model_dump(
         config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
         stopping_criteria_list = StoppingCriteriaList()
         if user_defined_stopping_strings:
             stop_words_ids = [tokenizer.encode(stop_string, add_special_tokens=False) for stop_string in user_defined_stopping_strings]
             stopping_criteria_list.append(StopOnKeywords(stop_words_ids))
         if config.eos_token_id is not None:
             eos_token_ids = [config.eos_token_id]
             if isinstance(config.eos_token_id, int):
             stopping_criteria_list.append(StopOnKeywords(stop_words_ids_eos))
         async def generate_responses() -> AsyncIterator[Dict[str, List[Dict[str, str]]]]:
+            nonlocal input_text  # Allow modification of the outer scope variable
             all_generated_text = ""
+            stop_reason = None
+            while True:
                 text_pipeline = pipeline(
                     task_type,
                     model=model_name,
                     token=HUGGINGFACE_HUB_TOKEN,
                     stopping_criteria=stopping_criteria_list,
                     **generation_params,
+                    max_new_tokens=requested_max_new_tokens
                 )
+                def generate_on_thread(pipeline, current_input_text, output_queue):
+                    result = pipeline(current_input_text)
                     output_queue.put_nowait(result)
                 output_queue = asyncio.Queue()
                 newly_generated_text = result[0]['generated_text'][len(all_generated_text):]
                 if not newly_generated_text:
+                    break
                 all_generated_text += newly_generated_text
                 yield {"response": [{'generated_text': newly_generated_text}]}
                 if stopping_criteria_list:
                     for criteria in stopping_criteria_list:
                         if isinstance(criteria, StopOnKeywords) and criteria.current_encounters > 0:
                     if stop_reason:
                         break
                 if config.eos_token_id is not None:
                     eos_tokens = [config.eos_token_id]
                     if isinstance(config.eos_token_id, int):
                     stop_reason = "eos_token"
                     break
                 input_text = all_generated_text
         async def text_stream():