Spaces:

imperialwool
/

llama-cpp-api

Running

App Files Files Community

toaster61 commited on Oct 3, 2023

Commit

1391fc1

1 Parent(s): 351861c

this is last quart commit, fr

Browse files

Files changed (2) hide show

app.py +23 -8
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -2,20 +2,22 @@
 from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
 from quart import Quart, request
 from llama_cpp import Llama
 # Initing things
-app = Quart(__name__)                                   # Quart app
-llm = Llama(model_path="./model.bin")                   # LLaMa model
-tokenizer = M2M100Tokenizer.from_pretrained(            # tokenizer for translator
     "facebook/m2m100_1.2B", cache_dir="translator/"
 )
-model = M2M100ForConditionalGeneration.from_pretrained( # translator model
     "facebook/m2m100_1.2B", cache_dir="translator/"
 )
-model.eval()
 # Preparing things to work
-tokenizer.src_lang = "en"
 # Loading prompt
 with open('system.prompt', 'r', encoding='utf-8') as f:
@@ -35,7 +37,20 @@ async def echo():
         return {"error": "Not enough data", "output": "Oops! Error occured! If you're a developer, using this API, check 'error' key."}, 400
     try:
         output = llm(userPrompt, max_tokens=maxTokens, stop=["User:", "\n"], echo=False)
-        return {"output": output["choices"][0]["text"]}
     except Exception as e:
         print(e)
         return {"error": str(e), "output": "Oops! Internal server error. Check the logs. If you're a developer, using this API, check 'error' key."}, 500
@@ -53,5 +68,5 @@ Powered by <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-pytho
 <h1>How to test it on own machine?</h1>
 You can install Docker, build image and run it. I made <code>`run-docker.sh`</code> for ya. To stop container run <code>`docker ps`</code>, find name of container and run <code>`docker stop _dockerContainerName_`</code><br>
 Or you can once follow steps in Dockerfile and try it on your machine, not in Docker.<br>
-<br>
 <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''

 from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
 from quart import Quart, request
 from llama_cpp import Llama
+import psutil
 # Initing things
+app = Quart(__name__)                                              # Quart app
+llm = Llama(model_path="./model.bin")                              # LLaMa model
+llama_model_name = "TheBloke/Llama-2-13B-chat-GGUF"
+translator_tokenizer = M2M100Tokenizer.from_pretrained(            # tokenizer for translator
     "facebook/m2m100_1.2B", cache_dir="translator/"
 )
+translator_model = M2M100ForConditionalGeneration.from_pretrained( # translator model
     "facebook/m2m100_1.2B", cache_dir="translator/"
 )
+translator_model.eval()
 # Preparing things to work
+translator_tokenizer.src_lang = "en"
 # Loading prompt
 with open('system.prompt', 'r', encoding='utf-8') as f:
         return {"error": "Not enough data", "output": "Oops! Error occured! If you're a developer, using this API, check 'error' key."}, 400
     try:
         output = llm(userPrompt, max_tokens=maxTokens, stop=["User:", "\n"], echo=False)
+        text = output["choices"][0]["text"]
+        # i allowed only certain languages:
+        # russian (ru), ukranian (uk), chinese (zh)
+        if isinstance(data.get("target_lang"), str) and data.get("target_lang").lower() in ["ru", "uk", "zh"]:
+            encoded_input = translator_tokenizer(output, return_tensors="pt")
+            generated_tokens = translator_model.generate(
+                **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(data.get("target_lang"))
+            )
+            translated_text = translator_tokenizer.batch_decode(
+                generated_tokens, skip_special_tokens=True
+            )[0]
+            return {"output": text, "translated_output": translated_text}
+        return {"output": text}
     except Exception as e:
         print(e)
         return {"error": str(e), "output": "Oops! Internal server error. Check the logs. If you're a developer, using this API, check 'error' key."}, 500
 <h1>How to test it on own machine?</h1>
 You can install Docker, build image and run it. I made <code>`run-docker.sh`</code> for ya. To stop container run <code>`docker ps`</code>, find name of container and run <code>`docker stop _dockerContainerName_`</code><br>
 Or you can once follow steps in Dockerfile and try it on your machine, not in Docker.<br>
+<br>''' + f"Memory free: {psutil.virtual_memory()[2]}" + '''
 <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-Werkzeug==2.3.7
 quart
-uvicorn
 torch
 transformers
 transformers[sentencepiece]

 quart
 torch
+psutil
+uvicorn
 transformers
+Werkzeug==2.3.7
 transformers[sentencepiece]