Spaces:
Sleeping
Sleeping
app.py
CHANGED
@@ -26,24 +26,22 @@ def get_completion(prompt, model="dolly-v0-70m"):
|
|
26 |
)
|
27 |
|
28 |
# Examples from https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
|
29 |
-
instructions = [
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
if response:
|
46 |
-
print(f"Instruction: {instruction}\n\n{response}\n\n-----------\n")
|
47 |
|
48 |
return response
|
49 |
|
|
|
26 |
)
|
27 |
|
28 |
# Examples from https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
|
29 |
+
instructions = [prompt]
|
30 |
+
|
31 |
+
# set some additional pipeline args
|
32 |
+
pipeline_kwargs = {'torch_dtype': "auto"}
|
33 |
+
#if gpu_family == "v100":
|
34 |
+
#pipeline_kwargs['torch_dtype'] = "float16"
|
35 |
+
#elif gpu_family == "a10" or gpu_family == "a100":
|
36 |
+
#pipeline_kwargs['torch_dtype'] = "bfloat16"
|
37 |
+
|
38 |
+
pipeline_kwargs['max_new_tokens'] = 300
|
39 |
+
|
40 |
+
# Use the model to generate responses for each of the instructions above.
|
41 |
+
for instruction in instructions:
|
42 |
+
response = generate_response(instruction, model=model, tokenizer=tokenizer, **pipeline_kwargs)
|
43 |
+
if response:
|
44 |
+
print(f"Instruction: {instruction}\n\n{response}\n\n-----------\n")
|
|
|
|
|
45 |
|
46 |
return response
|
47 |
|