Spaces:

saifeddinemk
/

mlai

Sleeping

App Files Files Community

saifeddinemk commited on 8 days ago

Commit

7d0de60

•

1 Parent(s): 5aaa320

Fixed app v2

Browse files

Files changed (1) hide show

app.py +21 -56

app.py CHANGED Viewed

@@ -1,78 +1,43 @@
-import torch
-import json
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import List
 # Initialize the FastAPI app
 app = FastAPI()
-# Model and tokenizer paths and loading
-model_path = "WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B"
-output_file_path = "/home/user/conversations.jsonl"
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    load_in_4bit=False,
-    trust_remote_code=False,
-)
-tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-# Function to generate text
-def generate_text(instruction):
-    tokens = tokenizer.encode(instruction)
-    tokens = torch.LongTensor(tokens).unsqueeze(0)
-    tokens = tokens.to("cuda")
-    instance = {
-        "input_ids": tokens,
-        "top_p": 1.0,
-        "temperature": 0.75,
-        "generate_len": 2048,
-        "top_k": 50,
-    }
-    length = len(tokens[0])
-    with torch.no_grad():
-        rest = model.generate(
-            input_ids=tokens,
-            max_length=length + instance["generate_len"],
-            use_cache=True,
-            do_sample=True,
-            top_p=instance["top_p"],
-            temperature=instance["temperature"],
-            top_k=instance["top_k"],
-            num_return_sequences=1,
-            pad_token_id=tokenizer.eos_token_id,
-        )
-    output = rest[0][length:]
-    string = tokenizer.decode(output, skip_special_tokens=True)
-    return f"{string}"
 # Data model for FastAPI input
 class UserInput(BaseModel):
     conversation: str
     user_input: str
 @app.post("/generate/")
 async def generate_response(user_input: UserInput):
     try:
-        # Construct the prompt
-        conversation = user_input.conversation
-        llm_prompt = f"{conversation}{user_input.user_input}<|im_end|>\n<|im_start|>assistant\nSure! Let me provide a complete and a thorough answer to your question, with functional and production-ready code.\n"
         # Generate response
-        answer = generate_text(llm_prompt)
-        # Update conversation for future requests
-        updated_conversation = f"{llm_prompt}{answer}<|im_end|>\n<|im_start|>user\n"
         return {
-            "response": answer,
             "updated_conversation": updated_conversation
         }
     except Exception as e:

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import pipeline, set_seed
 from typing import List
 # Initialize the FastAPI app
 app = FastAPI()
+# Initialize the generator pipeline
+generator = pipeline('text-generation', model='gpt2-medium')
+set_seed(42)
 # Data model for FastAPI input
 class UserInput(BaseModel):
     conversation: str
     user_input: str
+    max_length: int = 50  # default length
+    num_return_sequences: int = 1  # default number of sequences
 @app.post("/generate/")
 async def generate_response(user_input: UserInput):
     try:
+        # Construct the prompt from the conversation and user input
+        prompt = f"{user_input.conversation}{user_input.user_input}"
         # Generate response
+        responses = generator(
+            prompt,
+            max_length=user_input.max_length,
+            num_return_sequences=user_input.num_return_sequences
+        )
+        # Extract text from each generated sequence
+        generated_texts = [response["generated_text"] for response in responses]
+        # Update conversation with the last generated text
+        updated_conversation = f"{prompt}\n{generated_texts[-1]}"
         return {
+            "responses": generated_texts,
             "updated_conversation": updated_conversation
         }
     except Exception as e: