toaster61 commited on
Commit
5d264d1
1 Parent(s): a526b93

strange things

Browse files

it will work when clonned and runned locally
why it doesnt run in hf's docker?
shall i try to make gradio repo, not docker?

Files changed (4) hide show
  1. .gitignore +6 -1
  2. Dockerfile +4 -4
  3. gradio_app.py +22 -22
  4. run-docker.sh +1 -1
.gitignore CHANGED
@@ -1,4 +1,9 @@
1
- # BASIC PYTHON .GITIGNORE
 
 
 
 
 
2
 
3
  # Byte-compiled / optimized / DLL files
4
  __pycache__/
1
+ # BASIC PYTHON .GITIGNORE + some for testing
2
+
3
+ # ignoring test files for testing repo
4
+ flagged/
5
+ translator/
6
+ model.bin
7
 
8
  # Byte-compiled / optimized / DLL files
9
  __pycache__/
Dockerfile CHANGED
@@ -25,8 +25,8 @@ RUN ls
25
  # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
26
 
27
  # Updating pip and installing everything from requirements
28
- RUN python3 -m pip install -U --no-cache-dir pip setuptools wheel
29
- RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
30
 
31
- # Now it's time to run Quart app using uvicorn! (It's faster, trust me.)
32
- CMD ["gradio", "gradio_app.py"]
25
  # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
26
 
27
  # Updating pip and installing everything from requirements
28
+ RUN python3 -m pip install -U pip setuptools wheel
29
+ RUN pip install --upgrade -r /app/requirements.txt
30
 
31
+ # Now it's time to run Gradio app!
32
+ CMD ["python", "gradio_app.py"]
gradio_app.py CHANGED
@@ -5,23 +5,23 @@ import gradio as gr
5
  import psutil
6
 
7
  # Initing things
8
- #print("! DOWNLOADING TOKENIZER AND SETTING ALL UP !")
9
- #translator_tokenizer = M2M100Tokenizer.from_pretrained( # tokenizer for translator
10
- # "facebook/m2m100_418M", cache_dir="translator/"
11
- #)
12
- #print("! DOWNLOADING MODEL AND SETTING ALL UP !")
13
- #translator_model = M2M100ForConditionalGeneration.from_pretrained( # translator model
14
- # "facebook/m2m100_418M", cache_dir="translator/"
15
- #)
16
- #print("! SETTING MODEL IN EVALUATION MODE !")
17
- #translator_model.eval()
18
  print("! INITING LLAMA MODEL !")
19
  llm = Llama(model_path="./model.bin") # LLaMa model
20
  llama_model_name = "TheBloke/Llama-2-13B-chat-GGUF"
21
  print("! INITING DONE !")
22
 
23
  # Preparing things to work
24
- #translator_tokenizer.src_lang = "en"
25
  title = "llama.cpp API"
26
  desc = '''<style>a:visited{color:black;}</style>
27
  <h1>Hello, world!</h1>
@@ -56,15 +56,15 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
56
  text = output["choices"][0]["text"]
57
  # i allowed only certain languages (its not discrimination, its just other popular language on my opinion!!!):
58
  # russian (ru), ukranian (uk), chinese (zh)
59
- #if language in ["ru", "uk", "zh"]:
60
- #encoded_input = translator_tokenizer(output, return_tensors="pt")
61
- #generated_tokens = translator_model.generate(
62
- # **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(language)
63
- #)
64
- #translated_text = translator_tokenizer.batch_decode(
65
- # generated_tokens, skip_special_tokens=True
66
- #)[0]
67
- #return translated_text
68
  return text
69
  except Exception as e:
70
  print(e)
@@ -83,6 +83,6 @@ demo = gr.Interface(
83
  title=title,
84
  description=desc
85
  )
86
- #demo.queue()
87
  print("! LAUNCHING GRADIO !")
88
- demo.launch()
5
  import psutil
6
 
7
  # Initing things
8
+ print("! DOWNLOADING TOKENIZER AND SETTING ALL UP !")
9
+ translator_tokenizer = M2M100Tokenizer.from_pretrained( # tokenizer for translator
10
+ "facebook/m2m100_418M", cache_dir="translator/"
11
+ )
12
+ print("! DOWNLOADING MODEL AND SETTING ALL UP !")
13
+ translator_model = M2M100ForConditionalGeneration.from_pretrained( # translator model
14
+ "facebook/m2m100_418M", cache_dir="translator/"
15
+ )
16
+ print("! SETTING MODEL IN EVALUATION MODE !")
17
+ translator_model.eval()
18
  print("! INITING LLAMA MODEL !")
19
  llm = Llama(model_path="./model.bin") # LLaMa model
20
  llama_model_name = "TheBloke/Llama-2-13B-chat-GGUF"
21
  print("! INITING DONE !")
22
 
23
  # Preparing things to work
24
+ translator_tokenizer.src_lang = "en"
25
  title = "llama.cpp API"
26
  desc = '''<style>a:visited{color:black;}</style>
27
  <h1>Hello, world!</h1>
56
  text = output["choices"][0]["text"]
57
  # i allowed only certain languages (its not discrimination, its just other popular language on my opinion!!!):
58
  # russian (ru), ukranian (uk), chinese (zh)
59
+ if language in ["ru", "uk", "zh"]:
60
+ encoded_input = translator_tokenizer(text, return_tensors="pt")
61
+ generated_tokens = translator_model.generate(
62
+ **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(language)
63
+ )
64
+ translated_text = translator_tokenizer.batch_decode(
65
+ generated_tokens, skip_special_tokens=True
66
+ )[0]
67
+ return translated_text
68
  return text
69
  except Exception as e:
70
  print(e)
83
  title=title,
84
  description=desc
85
  )
86
+ demo.queue()
87
  print("! LAUNCHING GRADIO !")
88
+ demo.launch(server_name="0.0.0.0")
run-docker.sh CHANGED
@@ -2,4 +2,4 @@
2
  # Use it for tests. AND INSTALL DOCKER BEFORE U RUN IT!!!
3
 
4
  docker build -t llama-server .
5
- docker run -dp 0.0.0.0:7860:7860 llama-server
2
  # Use it for tests. AND INSTALL DOCKER BEFORE U RUN IT!!!
3
 
4
  docker build -t llama-server .
5
+ docker run -p 0.0.0.0:7860:7860 llama-server