Llama3_Physics

Sleeping

App Files Files Community

PawinC commited on May 1, 2024

Commit

f5fdf38

verified ·

1 Parent(s): 4d3709e

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +9 -2
app/main.py +61 -71
pythainlp-data/gitkeep +0 -0
requirements.txt +7 -1

Dockerfile CHANGED Viewed

@@ -8,11 +8,18 @@ COPY requirements.txt /requirements.txt
 RUN pip install -r requirements.txt
-COPY app /app
-COPY models /models
 #DO NOT FORGET TO UNCOMMENT THE ABOVE WHEN PUSHING TO HF!!!!
 # EXPOSE 7860
 ENV PYTHONUNBUFFERED=1

 RUN pip install -r requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+COPY --chown=user:user app /app
+COPY --chown=user:user models /models
 #DO NOT FORGET TO UNCOMMENT THE ABOVE WHEN PUSHING TO HF!!!!
+COPY --chown=user:user pythainlp-data /pythainlp-data
+RUN sha256sum /models/final-Physics_llama3.gguf
 # EXPOSE 7860
 ENV PYTHONUNBUFFERED=1

app/main.py CHANGED Viewed

@@ -8,51 +8,47 @@ from llama_cpp import Llama
 from pydantic import BaseModel
 from enum import Enum
-from typing import Optional
 # MODEL LOADING, FUNCTIONS, AND TESTING
 print("Loading model...")
-SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
-FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
-# WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 #)
-def extract_restext(response):
-  return response['choices'][0]['text'].strip()
-def ask_llm(llm, question, max_new_tokens=200, temperature=0.5):
-  prompt = f"""###User: {question}\n###Assistant:"""
-  result = extract_restext(llm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
   return result
-def check_sentiment(text):
-  prompt = f'Analyze the sentiment of the tweet enclosed in square brackets, determine if it is positive or negative, and return the answer as the corresponding sentiment label "positive" or  "negative" [{text}] ='
-  response = SAllm(prompt, max_tokens=3, stop=["\n"], echo=False, temperature=0.5)
-  # print(response)
-  result = extract_restext(response)
-  if "positive" in result:
-    return "positive"
-  elif "negative" in result:
-    return "negative"
-  else:
-    return "unknown"
 # TESTING THE MODEL
 print("Testing model...")
-assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
-assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
-# assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
 print("Ready.")
 # START OF FASTAPI APP
 app = FastAPI(
     title = "Gemma Finetuned API",
-    description="Gemma Finetuned API for Sentiment Analysis and Finance Questions.",
     version="1.0.0",
 )
@@ -67,22 +63,22 @@ app.add_middleware(
 # API DATA CLASSES
-class SA_Result(str, Enum):
-  positive = "positive"
-  negative = "negative"
-  unknown = "unknown"
-class SAResponse(BaseModel):
-  code: int = 200
-  text: Optional[str] = None
-  result: SA_Result = None
 class QuestionResponse(BaseModel):
   code: int = 200
   question: Optional[str] = None
   answer: str = None
   config: Optional[dict] = None
 # API ROUTES
 @app.get('/')
@@ -90,60 +86,54 @@ def docs():
   "Redirects the user from the main page to the docs."
   return responses.RedirectResponse('./docs')
-@app.post('/classifications/sentiment')
-async def perform_sentiment_analysis(prompt: str = Body(..., embed=True, example="I like eating fried chicken")) -> SAResponse:
-  """Performs a sentiment analysis using a finetuned version of Gemma-7b"""
-  if prompt:
-    try:
-      print(f"Checking sentiment for {prompt}")
-      result = check_sentiment(prompt)
-      print(f"Result: {result}")
-      return SAResponse(result=result, text=prompt)
-    except Exception as e:
-      return HTTPException(500, SAResponse(code=500, result=str(e), text=prompt))
-  else:
-    return HTTPException(400, SAResponse(code=400, result="Request argument 'prompt' not provided."))
-@app.post('/questions/finance')
-async def ask_gemmaFinanceTH(
-    prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
     temperature: float = Body(0.5, embed=True),
-    max_new_tokens: int = Body(200, embed=True)
 ) -> QuestionResponse:
   """
-  Ask a finetuned Gemma a finance-related question, just for fun.
-  NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
   """
   if prompt:
     try:
-      print(f'Asking GemmaFinance with the question "{prompt}"')
-      result = ask_llm(FIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
       print(f"Result: {result}")
-      return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e:
       return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
   else:
     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
-# @app.post('/questions/open-ended')
-# async def ask_gemmaWild(
-#     prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
 #     temperature: float = Body(0.5, embed=True),
 #     max_new_tokens: int = Body(200, embed=True)
-# ) -> QuestionResponse:
 #   """
-#   Ask a finetuned Gemma an open-ended question..
-#   NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
 #   """
-#   if prompt:
 #     try:
-#       print(f'Asking GemmaWild with the question "{prompt}"')
-#       result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
 #       print(f"Result: {result}")
-#       return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
 #     except Exception as e:
-#       return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
 #   else:
 #     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))

 from pydantic import BaseModel
 from enum import Enum
+from typing import Optional, Literal, Dict, List
 # MODEL LOADING, FUNCTIONS, AND TESTING
 print("Loading model...")
+PHllm = Llama(model_path="/models/final-Physics_llama3.gguf", use_mmap=False, use_mlock=True)
+# MIllm = Llama(model_path="/models/final-LlamaTuna_Q8_0.gguf", use_mmap=False, use_mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 #)
+print("Loading Translators.")
+from pythainlp.translate.en_th import EnThTranslator, ThEnTranslator
+t = EnThTranslator()
+e = ThEnTranslator()
+def extract_restext(response, is_chat=False):
+  return response['choices'][0]['text' if is_chat else 'message'].strip()
+def ask_llama(llm: Llama, question: str, max_new_tokens=200, temperature=0.5, repeat_penalty=2.0):
+  result = extract_restext(llm.create_chat_completion({"role": "user", "content": question}, max_tokens=max_new_tokens, temperature=temperature, repeat_penalty=repeat_penalty, stop=["<|eot_id|>", "<|end_of_text|>"]), is_chat=True)
+  return result
+def chat_llama(llm: Llama, chat_history: dict, max_new_tokens=200, temperature=0.5, repeat_penalty=2.0):
+  result = extract_restext(llm.create_chat_completion(chat_history, max_tokens=max_new_tokens, temperature=temperature, repeat_penalty=repeat_penalty, stop=["<|eot_id|>", "<|end_of_text|>"]), is_chat=True)
   return result
 # TESTING THE MODEL
 print("Testing model...")
+assert ask_llama(PHllm, ["Hello!, How are you today?"], max_new_tokens=5) #Just checking that it can run
+print("Checking Translators.")
+assert t.translate("Hello!") == "สวัสดี!"
+assert e.translate("สวัสดี!") == "Hello!"
 print("Ready.")
 # START OF FASTAPI APP
 app = FastAPI(
     title = "Gemma Finetuned API",
+    description="Gemma Finetuned API for Thai Open-ended question answering.",
     version="1.0.0",
 )
 # API DATA CLASSES
 class QuestionResponse(BaseModel):
   code: int = 200
   question: Optional[str] = None
   answer: str = None
   config: Optional[dict] = None
+class ChatHistoryResponse(BaseModel):
+  code: int = 200
+  chat_history: Dict[str] = None
+  answer: str = None
+  config: Optional[dict] = None
+class LlamaChatMessage(BaseModel):
+  role: Literal["user", "assistant"]
+  content: str
 # API ROUTES
 @app.get('/')
   "Redirects the user from the main page to the docs."
   return responses.RedirectResponse('./docs')
+@app.post('/questions/physics')
+async def ask_gemmaPhysics(
+    prompt: str = Body(..., embed=True, example="Why do ice cream melt so fast?"),
     temperature: float = Body(0.5, embed=True),
+    repeat_penalty: float = Body(1.0, embed=True),
+    max_new_tokens: int = Body(200, embed=True),
+    translate_from_thai: bool = Body(False, embed=True)
 ) -> QuestionResponse:
   """
+  Ask a finetuned Gemma an physics question.
+  NOTICE: Answers may be random / inaccurate. Always do your research & confirm its responses before doing anything.
   """
   if prompt:
     try:
+      print(f'Asking LlamaPhysics with the question "{prompt}", translation is {"enabled" if translate_from_thai else "disabled"}')
+      if translate_from_thai:
+        prompt = e.translate(prompt)
+      result = ask_llama(PHllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature, repeat_penalty=repeat_penalty)
       print(f"Result: {result}")
+      if translate_from_thai:
+        result = t.translate(result)
+      return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens, "repeat_penalty": repeat_penalty})
     except Exception as e:
       return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
   else:
     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
+# @app.post('/chat/multiturn')
+# async def ask_llama3_Tuna(
+#     chat_history: List[LlamaChatMessage] = Body(..., embed=True),
 #     temperature: float = Body(0.5, embed=True),
+#     repeat_penalty: float = Body(2.0, embed=True),
 #     max_new_tokens: int = Body(200, embed=True)
+# ) -> ChatHistoryResponse:
 #   """
+#   Chat with a finetuned Llama-3 model (in Thai).
+#   Answers may be random / inaccurate. Always do your research & confirm its responses before doing anything.
+#   NOTICE: YOU MUST APPLY THE LLAMA3 PROMPT YOURSELF!
 #   """
+#   if chat_history:
 #     try:
+#       print(f'Asking Llama3Tuna with the question "{chat_history}"')
+#       result = chat_llama(MIllm, chat_history, max_new_tokens=max_new_tokens, temperature=temperature, repeat_penalty=repeat_penalty)
 #       print(f"Result: {result}")
+#       return ChatHistoryResponse(answer=result, config={"temperature": temperature, "max_new_tokens": max_new_tokens, "repeat_penalty": repeat_penalty})
 #     except Exception as e:
+#       return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=chat_history))
 #   else:
 #     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))

pythainlp-data/gitkeep ADDED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,3 +1,9 @@
 uvicorn[standard]
 fastapi
-llama-cpp-python

 uvicorn[standard]
 fastapi
+llama-cpp-python
+pythainlp
+pandas
+fairseq
+sacremoses
+sentencepiece
+transformers