indonesian-whisperer

Runtime error

App Files Files Community

cahya commited on Feb 21, 2023

Commit

e84c607

•

1 Parent(s): faf39cc

update the generate param

Browse files

Files changed (1) hide show

app/api.py +20 -7

app/api.py CHANGED Viewed

@@ -70,20 +70,33 @@ async def websocket_endpoint(websocket: WebSocket):
 @app.post("/api/indochat/v1")
 async def indochat(
         text: str = Form(default="", description="The Prompt"),
         max_length: int = Form(default=250, description="Maximal length of the generated text"),
-        do_sample: bool = Form(default=True, description="Whether to use sampling; use greedy decoding otherwise"),
         top_k: int = Form(default=30, description="The number of highest probability vocabulary tokens to keep "
                                                   "for top-k-filtering"),
         top_p: float = Form(default=0.95, description="If set to float < 1, only the most probable tokens with "
                                                       "probabilities that add up to top_p or higher are kept "
                                                       "for generation"),
         temperature: float = Form(default=0.5, description="The Temperature of the softmax distribution"),
-        penalty_alpha: float = Form(default=0.0, description="Penalty alpha"),
         repetition_penalty: float = Form(default=1.2, description="Repetition penalty"),
-        seed: int = Form(default=42, description="Random Seed"),
         max_time: float = Form(default=60.0, description="Maximal time in seconds to generate the text")
 ):
-    set_seed(seed)
     if repetition_penalty == 0.0:
         min_penalty = 1.05
         max_penalty = 1.5
@@ -98,7 +111,8 @@ async def indochat(
     sample_outputs = model.generate(input_ids,
                                     penalty_alpha=penalty_alpha,
                                     do_sample=do_sample,
-                                    min_length=200,
                                     max_length=max_length,
                                     top_k=top_k,
                                     top_p=top_p,
@@ -108,11 +122,10 @@ async def indochat(
                                     max_time=max_time
                                     )
     result = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
-    # result = result[len(prompt) + 1:]
     time_end = time.time()
     time_diff = time_end - time_start
     print(f"result:\n{result}")
-    generated_text = result[len(prompt):]
     return {"generated_text": generated_text, "processing_time": time_diff}

 @app.post("/api/indochat/v1")
 async def indochat(
         text: str = Form(default="", description="The Prompt"),
+        decoding_method: str = Form(default="Sampling", description="Decoding method"),
+        min_length: int = Form(default=50, description="Minimal length of the generated text"),
         max_length: int = Form(default=250, description="Maximal length of the generated text"),
+        num_beams: int = Form(default=5, description="Beams number"),
         top_k: int = Form(default=30, description="The number of highest probability vocabulary tokens to keep "
                                                   "for top-k-filtering"),
         top_p: float = Form(default=0.95, description="If set to float < 1, only the most probable tokens with "
                                                       "probabilities that add up to top_p or higher are kept "
                                                       "for generation"),
         temperature: float = Form(default=0.5, description="The Temperature of the softmax distribution"),
+        penalty_alpha: float = Form(default=0.5, description="Penalty alpha"),
         repetition_penalty: float = Form(default=1.2, description="Repetition penalty"),
+        seed: int = Form(default=-1, description="Random Seed"),
         max_time: float = Form(default=60.0, description="Maximal time in seconds to generate the text")
 ):
+    if seed >= 0:
+        set_seed(seed)
+    if decoding_method == "Beam Search":
+        do_sample = False
+        penalty_alpha = 0
+    elif decoding_method == "Sampling":
+        do_sample = True
+        penalty_alpha = 0
+        num_beams = 1
+    else:
+        do_sample = False
+        num_beams = 1
     if repetition_penalty == 0.0:
         min_penalty = 1.05
         max_penalty = 1.5
     sample_outputs = model.generate(input_ids,
                                     penalty_alpha=penalty_alpha,
                                     do_sample=do_sample,
+                                    num_beams=num_beams,
+                                    min_length=min_length,
                                     max_length=max_length,
                                     top_k=top_k,
                                     top_p=top_p,
                                     max_time=max_time
                                     )
     result = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
     time_end = time.time()
     time_diff = time_end - time_start
     print(f"result:\n{result}")
+    generated_text = result[len(prompt)+1:]
     return {"generated_text": generated_text, "processing_time": time_diff}