Spaces:
Paused
Paused
fixes
Browse files
app.py
CHANGED
@@ -5,8 +5,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1")
|
9 |
-
model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1")
|
10 |
|
11 |
class ChatInput(BaseModel):
|
12 |
system_input: str
|
@@ -20,9 +20,9 @@ async def generate_response(chat_input: ChatInput):
|
|
20 |
|
21 |
# Tokenize and encode the prompt
|
22 |
inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)
|
23 |
-
|
24 |
# Generate a response
|
25 |
-
outputs = model.generate(inputs, max_length=1000, num_return_sequences=1)
|
26 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
27 |
|
28 |
# Extract only the assistant's response
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1", local_files_only=True)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1", local_files_only=True)
|
10 |
|
11 |
class ChatInput(BaseModel):
|
12 |
system_input: str
|
|
|
20 |
|
21 |
# Tokenize and encode the prompt
|
22 |
inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)
|
23 |
+
|
24 |
# Generate a response
|
25 |
+
outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token)
|
26 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
27 |
|
28 |
# Extract only the assistant's response
|