Spaces:

aka7774
/

trllm

Sleeping

App Files Files Community

aka7774 commited on May 30

Commit

0be9b92

•

1 Parent(s): 7b90ef7

Upload 2 files

Browse files

Files changed (2) hide show

fn.py +35 -37
main.py +1 -1

fn.py CHANGED Viewed

@@ -156,6 +156,19 @@ def chatinterface_to_messages(message, history):
     return messages
 def chat(message, history = [], instruction = None, args = {}):
     global tokenizer, model, cfg
@@ -168,20 +181,17 @@ def chat(message, history = [], instruction = None, args = {}):
     model_inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
-    if 'fastapi' not in args or 'stream' in args and args['stream']:
-        streamer = TextIteratorStreamer(
-            tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True,
-        )
     generate_kwargs = dict(
         model_inputs,
         do_sample=True,
     )
-    if 'fastapi' not in args or 'stream' in args and args['stream']:
-        generate_kwargs['streamer'] = streamer
-        generate_kwargs['num_beams'] = 1
     for k in [
         'max_new_tokens',
         'temperature',
@@ -192,33 +202,21 @@ def chat(message, history = [], instruction = None, args = {}):
         if cfg[k]:
             generate_kwargs[k] = cfg[k]
-    if 'fastapi' not in args or 'stream' in args and args['stream']:
-        t = Thread(target=model.generate, kwargs=generate_kwargs)
-        t.start()
-        model_output = ""
-        for new_text in streamer:
-            model_output += new_text
-            if 'fastapi' in args:
-                # fastapiは差分だけを返して欲しい
-                yield new_text
-            else:
-                # gradioは常に全文を返して欲しい
-                yield model_output
-    outputs = model.generate(**generate_kwargs)
-    content = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return content
-def apply_template(messages):
-    global tokenizer, cfg
-    if cfg['chat_template']:
-        tokenizer.chat_template = cfg['chat_template']
-    if type(messages) is str:
-        if cfg['inst_template']:
-            return cfg['inst_template'].format(instruction=cfg['instruction'], input=messages)
-        return cfg['instruction'].format(input=messages)
-    if type(messages) is list:
-        return tokenizer.apply_chat_template(conversation=messages, add_generation_prompt=True, tokenize=False)

     return messages
+def apply_template(messages):
+    global tokenizer, cfg
+    if cfg['chat_template']:
+        tokenizer.chat_template = cfg['chat_template']
+    if type(messages) is str:
+        if cfg['inst_template']:
+            return cfg['inst_template'].format(instruction=cfg['instruction'], input=messages)
+        return cfg['instruction'].format(input=messages)
+    if type(messages) is list:
+        return tokenizer.apply_chat_template(conversation=messages, add_generation_prompt=True, tokenize=False)
 def chat(message, history = [], instruction = None, args = {}):
     global tokenizer, model, cfg
     model_inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True,
+    )
     generate_kwargs = dict(
         model_inputs,
         do_sample=True,
+        streamer=streamer,
+        num_beams=1,
     )
     for k in [
         'max_new_tokens',
         'temperature',
         if cfg[k]:
             generate_kwargs[k] = cfg[k]
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    model_output = ""
+    for new_text in streamer:
+        model_output += new_text
+        if 'fastapi' in args:
+            # fastapiは差分だけを返して欲しい
+            yield new_text
+        else:
+            # gradioは常に全文を返して欲しい
+            yield model_output
+def infer(message, history = [], instruction = None, args = {}):
+    content = ''
+    for s in chat(message, history, instruction, args):
+        content += s
     return content

main.py CHANGED Viewed

@@ -40,5 +40,5 @@ async def api_infer(args: dict):
             media_type="text/event-stream",
         )
     else:
-        content = fn.chat(args['input'], [], args['instruct'], args)
         return {'content': content}

             media_type="text/event-stream",
         )
     else:
+        content = fn.infer(args['input'], [], args['instruct'], args)
         return {'content': content}