ethanwinters1907 commited on
Commit
8d4a6a3
·
verified ·
1 Parent(s): 5cb50ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -1,35 +1,21 @@
1
- from flask import Flask, request, jsonify
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
-
5
- app = Flask(__name__)
6
-
7
- # Load tokenizer and model once when the server starts
8
- tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")
9
- model = AutoModelForCausalLM.from_pretrained("openai/gpt-oss-20b")
10
-
11
- # Move model to GPU if available, else CPU
12
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
- model.to(device)
14
-
15
- @app.route('/generate', methods=['POST'])
16
- def generate_text():
17
- data = request.get_json()
18
- prompt = data.get('prompt')
19
-
20
- if not prompt:
21
- return jsonify({'error': 'No prompt provided'}), 400
22
-
23
- # Tokenize input and move tensors to device
24
- inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
25
-
26
- # Generate output tokens (you can tweak max_length)
27
- outputs = model.generate(inputs, max_length=50, do_sample=True, top_k=50, top_p=0.95)
28
-
29
- # Decode tokens to string
30
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
-
32
- return jsonify({'generated_text': generated_text})
33
-
34
- if __name__ == '__main__':
35
- app.run(debug=True)
 
1
+ import os
2
+ from openai import OpenAI
3
+
4
+ client = OpenAI(
5
+ base_url="https://router.huggingface.co/v1",
6
+ api_key=os.environ["HF_TOKEN"],
7
+ )
8
+
9
+ stream = client.chat.completions.create(
10
+ model="openai/gpt-oss-20b",
11
+ messages=[
12
+ {
13
+ "role": "user",
14
+ "content": "What is the capital of France?"
15
+ }
16
+ ],
17
+ stream=True,
18
+ )
19
+
20
+ for chunk in stream:
21
+ print(chunk.choices[0].delta.content, end="")