Spaces:

phuongnv
/

retrosynthesis_16X

Paused

App Files Files Community

phuongnv commited on Jun 14

Commit

e4fefdd

•

1 Parent(s): 36d332d

Update main.py

Browse files

Files changed (1) hide show

main.py +40 -17

main.py CHANGED Viewed

@@ -1,22 +1,45 @@
-from ctransformers import AutoModelForCausalLM
-from fastapi import FastAPI, Form
 from pydantic import BaseModel
-#Model loading
-llm = AutoModelForCausalLM.from_pretrained("model.gguf", max_new_tokens = 512)
-#Pydantic object
-class validation(BaseModel):
     prompt: str
-#Fast API
-app = FastAPI()
-#Zephyr completion
-@app.post("/llm_on_cpu")
-async def stream(item: validation):
-    E_INST = "</s>"
-    user, assistant = "<|user|>", "<|assistant|>"
-    prompt = f"{E_INST}\n{user}\n{item.prompt.strip()}{E_INST}\n{assistant}\n"
-    return llm(prompt)

+from ctransformers import AutoModelForCausalLM, AutoTokenizer
+from fastapi import FastAPI, Form, HTTPException
 from pydantic import BaseModel
+import torch
+import selfies as sf
+app = FastAPI()
+# Load the model and tokenizer
+model_name = "model.gguf"  # Replace with your model name
+test_model = AutoModelForCausalLM.from_pretrained(model_name)
+test_tokenizer = AutoTokenizer.from_pretrained(model_name)
+class RequestBody(BaseModel):
     prompt: str
+@app.post("/generate/")
+async def generate_text(request: RequestBody):
+    try:
+        prompt = request.prompt
+        input_ids = test_tokenizer(prompt, return_tensors='pt', truncation=False).input_ids
+        outputs = test_model.generate(
+            input_ids=input_ids,
+            max_new_tokens=512,
+            num_beams=10,
+            early_stopping=True,
+            num_return_sequences=10,
+            do_sample=True
+        )
+        result = {'input': prompt}
+        for i in range(10):
+            output1 = test_tokenizer.batch_decode(outputs.detach().numpy(), skip_special_tokens=True)[i][len(prompt):]
+            first_inst_index = output1.find("[/INST]")
+            second_inst_index = output1.find("[/IN", first_inst_index + len("[/INST]") + 1)
+            predicted_selfies = output1[first_inst_index + len("[/INST]"):second_inst_index].strip()
+            result[f'predict_{i+1}'] = predicted_selfies
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def read_root():
+    return {"message": "Welcome to the LLM FastAPI application!"}