aleyfin commited on
Commit
affe825
1 Parent(s): 33debf2
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -5,8 +5,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  app = FastAPI()
7
 
8
- tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1")
9
- model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1")
10
 
11
  class ChatInput(BaseModel):
12
  system_input: str
@@ -20,9 +20,9 @@ async def generate_response(chat_input: ChatInput):
20
 
21
  # Tokenize and encode the prompt
22
  inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)
23
-
24
  # Generate a response
25
- outputs = model.generate(inputs, max_length=1000, num_return_sequences=1)
26
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
 
28
  # Extract only the assistant's response
 
5
 
6
  app = FastAPI()
7
 
8
+ tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1", local_files_only=True)
9
+ model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1", local_files_only=True)
10
 
11
  class ChatInput(BaseModel):
12
  system_input: str
 
20
 
21
  # Tokenize and encode the prompt
22
  inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)
23
+
24
  # Generate a response
25
+ outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token)
26
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
 
28
  # Extract only the assistant's response