Spaces:
Sleeping
Sleeping
Update llama2.py
Browse files
llama2.py
CHANGED
@@ -93,4 +93,41 @@ async def gen_text(
|
|
93 |
|
94 |
client = sseclient.SSEClient(r)
|
95 |
for event in client.events():
|
96 |
-
yield json.loads(event.data)['token']['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
client = sseclient.SSEClient(r)
|
95 |
for event in client.events():
|
96 |
+
yield json.loads(event.data)['token']['text']
|
97 |
+
|
98 |
+
def gen_text_none_stream(
|
99 |
+
prompt,
|
100 |
+
hf_model='meta-llama/Llama-2-70b-chat-hf',
|
101 |
+
hf_token=None,
|
102 |
+
):
|
103 |
+
parameters = {
|
104 |
+
'max_new_tokens': 64,
|
105 |
+
'do_sample': True,
|
106 |
+
'return_full_text': False,
|
107 |
+
'temperature': 0.7,
|
108 |
+
'top_k': 10,
|
109 |
+
# 'top_p': 1.0,
|
110 |
+
'repetition_penalty': 1.2
|
111 |
+
}
|
112 |
+
|
113 |
+
url = f'https://api-inference.huggingface.co/models/{hf_model}'
|
114 |
+
headers={
|
115 |
+
'Authorization': f'Bearer {hf_token}',
|
116 |
+
'Content-type': 'application/json'
|
117 |
+
}
|
118 |
+
data = {
|
119 |
+
'inputs': prompt,
|
120 |
+
'stream': False,
|
121 |
+
'options': {
|
122 |
+
'use_cache': False,
|
123 |
+
},
|
124 |
+
'parameters': parameters
|
125 |
+
}
|
126 |
+
|
127 |
+
r = requests.post(
|
128 |
+
url,
|
129 |
+
headers=headers,
|
130 |
+
data=json.dumps(data),
|
131 |
+
)
|
132 |
+
|
133 |
+
return json.loads(r.text)[0]["generated_text"]
|