Spaces:

debisoft
/

dolly-v0-70m

Sleeping

App Files Files Community

debisoft commited on Dec 3, 2023

Commit

7d68ac7

•

1 Parent(s): 2c7ad90

prompt only

Browse files

Files changed (1) hide show

app.py +30 -41

app.py CHANGED Viewed

@@ -1,53 +1,42 @@
 import os
-import re
-from datetime import datetime
 import gradio as gr
 import json
 from dotenv import load_dotenv, find_dotenv
 _ = load_dotenv(find_dotenv())
-from training.consts import DEFAULT_INPUT_MODEL, SUGGESTED_INPUT_MODELS
-from training.trainer import load_training_dataset, load_tokenizer
-from training.generate import generate_response, load_model_tokenizer_for_generate
-gpu_family = "a100"
-model_dir = "model"
-model, tokenizer = load_model_tokenizer_for_generate(model_dir)
-def get_completion(prompt, model_name="dolly-v0-70m"):
-	# Examples from https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
-	instructions = [prompt]
-	# set some additional pipeline args
-	pipeline_kwargs = {'torch_dtype': "auto"}
-	#if gpu_family == "v100":
-	#pipeline_kwargs['torch_dtype'] = "float16"
-	#elif gpu_family == "a10" or gpu_family == "a100":
-	#pipeline_kwargs['torch_dtype'] = "bfloat16"
-	pipeline_kwargs['max_new_tokens'] = 100
-	#pipeline_kwargs['temperature'] = float("inf")
-	#pipeline_kwargs['top_k'] = 1
-	pipeline_kwargs['top_p'] = 0.01
-	# Use the model to generate responses for each of the instructions above.
-	for instruction in instructions:
-		response = generate_response(instruction, model=model, tokenizer=tokenizer, **pipeline_kwargs)
-	if response:
-		print(f"Instruction: {instruction}\n\n{response}\n\n-----------\n")
-	return response
 def greet(input):
-	prompt = f"""
-Text: ```{input}```
-    """
-	response = get_completion(prompt)
-	return response
-#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-#iface.launch()
-#iface = gr.Interface(fn=greet, inputs=[gr.Textbox(label="Text to find entities", lines=2)], outputs=[gr.HighlightedText(label="Text with entities")], title="NER with dslim/bert-base-NER", description="Find entities using the `dslim/bert-base-NER` model under the hood!", allow_flagging="never", examples=["My name is Andrew and I live in California", "My name is Poli and work at HuggingFace"])
 iface = gr.Interface(fn=greet, inputs=[gr.Textbox(label="Prompt")], outputs="text")
 iface.launch()

+import numpy as np
+import pandas as pd
+import requests
 import os
 import gradio as gr
 import json
 from dotenv import load_dotenv, find_dotenv
 _ = load_dotenv(find_dotenv())
+databricks_token  = os.getenv('DATABRICKS_TOKEN')
+model_uri  = "https://dbc-eb788f31-6c73.cloud.databricks.com/serving-endpoints/Mpt-7b-tester/invocations"
+def score_model(model_uri, databricks_token, prompt):
+  dataset=pd.DataFrame({
+            "prompt":[prompt],
+            "temperature": [0.5],
+            "max_tokens": [1500]})
+  headers = {
+    "Authorization": f"Bearer {databricks_token}",
+    "Content-Type": "application/json",
+  }
+  ds_dict = {'dataframe_split': dataset.to_dict(orient='split')} if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
+  data_json = json.dumps(ds_dict, allow_nan=True)
+  print("***ds_dict: ")
+  print(ds_dict)
+  print("***data_json: ")
+  print(data_json)
+  response = requests.request(method='POST', headers=headers, url=model_uri, data=data_json)
+  if response.status_code != 200:
+      raise Exception(f"Request failed with status {response.status_code}, {response.text}")
+  return response.json()
+def get_completion(prompt):
+    return score_model(model_uri, databricks_token, prompt)
 def greet(input):
+	response = get_completion(input)
+	return json.dumps(response)
 iface = gr.Interface(fn=greet, inputs=[gr.Textbox(label="Prompt")], outputs="text")
 iface.launch()