curiousily
/

falcon-7b-qlora-chat-support-bot-faq-merged

Text Generation

RefinedWebModel

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

curiousily commited on Jun 19, 2023

Commit

bbf6392

•

1 Parent(s): 43005e8

Update handler.py

Files changed (1) hide show

handler.py +6 -4

handler.py CHANGED Viewed

@@ -5,9 +5,9 @@ import torch
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
             path,
@@ -27,10 +27,12 @@ class EndpointHandler:
         self.generation_config = generation_config
         self.pipeline = transformers.pipeline(
-            "text-generation", model=model, tokenizer=tokenizer
         )
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        prompt = data.pop("prompt", data)
         result = self.pipeline(prompt, generation_config=self.generation_config)
-        return result

 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
             path,
         self.generation_config = generation_config
         self.pipeline = transformers.pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer
         )
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        prompt = data.pop("inputs", data)
         result = self.pipeline(prompt, generation_config=self.generation_config)
+        return result