toaster61 commited on
Commit
1391fc1
1 Parent(s): 351861c

this is last quart commit, fr

Browse files
Files changed (2) hide show
  1. app.py +23 -8
  2. requirements.txt +3 -2
app.py CHANGED
@@ -2,20 +2,22 @@
2
  from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
3
  from quart import Quart, request
4
  from llama_cpp import Llama
 
5
 
6
  # Initing things
7
- app = Quart(__name__) # Quart app
8
- llm = Llama(model_path="./model.bin") # LLaMa model
9
- tokenizer = M2M100Tokenizer.from_pretrained( # tokenizer for translator
 
10
  "facebook/m2m100_1.2B", cache_dir="translator/"
11
  )
12
- model = M2M100ForConditionalGeneration.from_pretrained( # translator model
13
  "facebook/m2m100_1.2B", cache_dir="translator/"
14
  )
15
- model.eval()
16
 
17
  # Preparing things to work
18
- tokenizer.src_lang = "en"
19
 
20
  # Loading prompt
21
  with open('system.prompt', 'r', encoding='utf-8') as f:
@@ -35,7 +37,20 @@ async def echo():
35
  return {"error": "Not enough data", "output": "Oops! Error occured! If you're a developer, using this API, check 'error' key."}, 400
36
  try:
37
  output = llm(userPrompt, max_tokens=maxTokens, stop=["User:", "\n"], echo=False)
38
- return {"output": output["choices"][0]["text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  except Exception as e:
40
  print(e)
41
  return {"error": str(e), "output": "Oops! Internal server error. Check the logs. If you're a developer, using this API, check 'error' key."}, 500
@@ -53,5 +68,5 @@ Powered by <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-pytho
53
  <h1>How to test it on own machine?</h1>
54
  You can install Docker, build image and run it. I made <code>`run-docker.sh`</code> for ya. To stop container run <code>`docker ps`</code>, find name of container and run <code>`docker stop _dockerContainerName_`</code><br>
55
  Or you can once follow steps in Dockerfile and try it on your machine, not in Docker.<br>
56
- <br>
57
  <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''
 
2
  from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
3
  from quart import Quart, request
4
  from llama_cpp import Llama
5
+ import psutil
6
 
7
  # Initing things
8
+ app = Quart(__name__) # Quart app
9
+ llm = Llama(model_path="./model.bin") # LLaMa model
10
+ llama_model_name = "TheBloke/Llama-2-13B-chat-GGUF"
11
+ translator_tokenizer = M2M100Tokenizer.from_pretrained( # tokenizer for translator
12
  "facebook/m2m100_1.2B", cache_dir="translator/"
13
  )
14
+ translator_model = M2M100ForConditionalGeneration.from_pretrained( # translator model
15
  "facebook/m2m100_1.2B", cache_dir="translator/"
16
  )
17
+ translator_model.eval()
18
 
19
  # Preparing things to work
20
+ translator_tokenizer.src_lang = "en"
21
 
22
  # Loading prompt
23
  with open('system.prompt', 'r', encoding='utf-8') as f:
 
37
  return {"error": "Not enough data", "output": "Oops! Error occured! If you're a developer, using this API, check 'error' key."}, 400
38
  try:
39
  output = llm(userPrompt, max_tokens=maxTokens, stop=["User:", "\n"], echo=False)
40
+ text = output["choices"][0]["text"]
41
+ # i allowed only certain languages:
42
+ # russian (ru), ukranian (uk), chinese (zh)
43
+ if isinstance(data.get("target_lang"), str) and data.get("target_lang").lower() in ["ru", "uk", "zh"]:
44
+ encoded_input = translator_tokenizer(output, return_tensors="pt")
45
+ generated_tokens = translator_model.generate(
46
+ **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(data.get("target_lang"))
47
+ )
48
+ translated_text = translator_tokenizer.batch_decode(
49
+ generated_tokens, skip_special_tokens=True
50
+ )[0]
51
+ return {"output": text, "translated_output": translated_text}
52
+
53
+ return {"output": text}
54
  except Exception as e:
55
  print(e)
56
  return {"error": str(e), "output": "Oops! Internal server error. Check the logs. If you're a developer, using this API, check 'error' key."}, 500
 
68
  <h1>How to test it on own machine?</h1>
69
  You can install Docker, build image and run it. I made <code>`run-docker.sh`</code> for ya. To stop container run <code>`docker ps`</code>, find name of container and run <code>`docker stop _dockerContainerName_`</code><br>
70
  Or you can once follow steps in Dockerfile and try it on your machine, not in Docker.<br>
71
+ <br>''' + f"Memory free: {psutil.virtual_memory()[2]}" + '''
72
  <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
- Werkzeug==2.3.7
2
  quart
3
- uvicorn
4
  torch
 
 
5
  transformers
 
6
  transformers[sentencepiece]
 
 
1
  quart
 
2
  torch
3
+ psutil
4
+ uvicorn
5
  transformers
6
+ Werkzeug==2.3.7
7
  transformers[sentencepiece]