novamysticX commited on
Commit
4c5cefd
·
verified ·
1 Parent(s): 74d0bae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -78
app.py CHANGED
@@ -1,86 +1,24 @@
1
- import os
2
- import torch
3
- from fastapi import FastAPI, HTTPException
4
- from pydantic import BaseModel
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
- from threading import Thread
7
 
8
- app = FastAPI()
 
9
 
10
- # Model Settings
11
- MODEL_ID = "mistralai/Mistral-Nemo-Instruct-2407" # Update if needed
12
 
13
- # Load model and tokenizer
14
- device = "cpu" # Ensure it's on CPU
15
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
- model = AutoModelForCausalLM.from_pretrained(
17
- MODEL_ID,
18
- torch_dtype=torch.float32, # Ensure compatibility with CPU
19
- device_map="cpu", # Make sure model runs on CPU
20
- ignore_mismatched_sizes=True
21
- )
22
 
23
- # Request model
24
- class RequestModel(BaseModel):
25
- message: str
26
- history: list = []
27
- temperature: float = 0.3
28
- max_new_tokens: int = 1024
29
- top_p: float = 1.0
30
- top_k: int = 20
31
- penalty: float = 1.2
32
-
33
- @app.post("/generate")
34
- async def generate_text(request: RequestModel):
35
- try:
36
- # Prepare conversation
37
- conversation = []
38
- for prompt, answer in request.history:
39
- conversation.extend([
40
- {"role": "user", "content": prompt},
41
- {"role": "assistant", "content": answer},
42
- ])
43
- conversation.append({"role": "user", "content": request.message})
44
-
45
- # Tokenize input
46
- input_text = tokenizer.apply_chat_template(conversation, tokenize=False)
47
- inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
48
-
49
- # Streaming setup
50
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
51
-
52
- # Generation parameters
53
- generate_kwargs = dict(
54
- input_ids=inputs,
55
- max_new_tokens=request.max_new_tokens,
56
- do_sample=False if request.temperature == 0 else True,
57
- top_p=request.top_p,
58
- top_k=request.top_k,
59
- temperature=request.temperature,
60
- streamer=streamer,
61
- repetition_penalty=request.penalty,
62
- pad_token_id=tokenizer.pad_token_id
63
- )
64
-
65
- # Start model generation
66
- with torch.no_grad():
67
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
68
- thread.start()
69
-
70
- # Stream output
71
- buffer = ""
72
- for new_text in streamer:
73
- buffer += new_text
74
-
75
- # Return response
76
- return {"response": buffer}
77
 
78
- except Exception as e:
79
- raise HTTPException(status_code=500, detail=str(e))
80
 
81
- # Root endpoint
82
- @app.get("/")
83
- def root():
84
- return {"message": "Welcome to the Mistral-Nemo text generation API"}
85
 
 
 
 
 
86
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import pipeline
 
 
 
 
3
 
4
+ ## create a new FASTAPI app instance
5
+ app=FastAPI()
6
 
7
+ # Initialize the text generation pipeline
8
+ pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct")
9
 
 
 
 
 
 
 
 
 
 
10
 
11
+ @app.get("/")
12
+ def home():
13
+ return {"message":"Hello World"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Define a function to handle the GET request at `/generate`
 
16
 
 
 
 
 
17
 
18
+ @app.get("/generate")
19
+ def generate(text:str):
20
+ ## use the pipeline to generate text from given input text
21
+ output=pipe(text)
22
 
23
+ ## return the generate text in Json reposne
24
+ return {"output":output[0]['generated_text']}