Spaces:

chansung
/

llama2-with-gradio-chat

Running on T4

chansung commited on Aug 23, 2023

Commit

a5ad5a5

1 Parent(s): 1466f9d

Update llama2.py

Files changed (1) hide show

llama2.py CHANGED Viewed

@@ -93,4 +93,41 @@ async def gen_text(
   client = sseclient.SSEClient(r)
   for event in client.events():
-    yield json.loads(event.data)['token']['text']

   client = sseclient.SSEClient(r)
   for event in client.events():
+    yield json.loads(event.data)['token']['text']
+def gen_text_none_stream(
+    prompt,
+    hf_model='meta-llama/Llama-2-70b-chat-hf',
+    hf_token=None,
+):
+    parameters = {
+        'max_new_tokens': 64,
+        'do_sample': True,
+        'return_full_text': False,
+        'temperature': 0.7,
+        'top_k': 10,
+        # 'top_p': 1.0,
+        'repetition_penalty': 1.2
+    }
+    url = f'https://api-inference.huggingface.co/models/{hf_model}'
+    headers={
+        'Authorization': f'Bearer {hf_token}',
+        'Content-type': 'application/json'
+    }
+    data = {
+        'inputs': prompt,
+        'stream': False,
+        'options': {
+            'use_cache': False,
+        },
+        'parameters': parameters
+    }
+    r = requests.post(
+        url,
+        headers=headers,
+        data=json.dumps(data),
+    )
+    return json.loads(r.text)[0]["generated_text"]