llmware
/

bling-falcon-1b-0.1

Text Generation

text-generation-inference

Model card Files Files and versions Community

doberst commited on Nov 14, 2023

Commit

487689f

•

1 Parent(s): bb0787c

Upload generation_test_hf_script.py

Files changed (1) hide show

generation_test_hf_script.py +7 -4

generation_test_hf_script.py CHANGED Viewed

@@ -27,10 +27,14 @@ def load_rag_benchmark_tester_ds():
 def run_test(model_name, test_ds):
     model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     for i, entries in enumerate(test_ds):
@@ -63,7 +67,7 @@ def run_test(model_name, test_ds):
         bot = output_only.find("<bot>:")
         if bot > -1:
             output_only = output_only[bot+len("<bot>:"):]
         # end - post-processing
         print("\n")
@@ -78,7 +82,6 @@ if __name__ == "__main__":
     test_ds = load_rag_benchmark_tester_ds()
     model_name = "llmware/bling-falcon-1b-0.1"
     output = run_test(model_name,test_ds)

 def run_test(model_name, test_ds):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print("update: model will be loaded on device - ", device)
     model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+    model.to(device)
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     for i, entries in enumerate(test_ds):
         bot = output_only.find("<bot>:")
         if bot > -1:
             output_only = output_only[bot+len("<bot>:"):]
         # end - post-processing
         print("\n")
     test_ds = load_rag_benchmark_tester_ds()
     model_name = "llmware/bling-falcon-1b-0.1"
     output = run_test(model_name,test_ds)