tjtanaa commited on
Commit
b0c4008
1 Parent(s): 3983955

fix bug, reduce memory footprint

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -21,8 +21,9 @@ def sanitize_jinja2(jinja_lines):
21
  def get_existing_templates():
22
  return [None] + os.listdir("./templates")
23
 
24
- if len(os.listdir("./tmp")) > 20:
25
- shutil.rmtree('./tmp')
 
26
 
27
  # Initialization
28
  if 'tokenizer_json' not in st.session_state:
@@ -83,12 +84,14 @@ if gen_button:
83
  st.session_state['tokenizer'] = AutoTokenizer.from_pretrained(hf_model_repo_name)
84
 
85
  st.session_state['repo_normalized_name'] = hf_model_repo_name.replace("/", "_")
86
- st.session_state['tokenizer'].save_pretrained(f"./tmp/{st.session_state['uuid']}_{hf_model_repo_name}")
87
  st.session_state['tokenizer_json'] = f"./tmp/{st.session_state['uuid']}_{hf_model_repo_name}"
 
88
 
89
  if st.session_state['tokenizer_json'] is not None:
 
90
  with open(f"{st.session_state['tokenizer_json']}/tokenizer_config.json", "rb") as f:
91
  tokenizer_json = json.load(f)
 
92
 
93
  json_spec, col2 = st.columns(spec=[0.3, 0.7])
94
 
@@ -146,6 +149,7 @@ if st.session_state['tokenizer_json'] is not None:
146
  with open(f"./tmp/{st.session_state['uuid']}/tmp_chat_template.json", "r") as f:
147
  jinja_lines = f.readlines()
148
  st.session_state['tokenizer'].chat_template = sanitize_jinja2(jinja_lines)
 
149
  generated_prompt_wo_add_generation_prompt = st.session_state['tokenizer'].apply_chat_template(chat, tokenize=False, add_generation_prompt= False)
150
  generated_prompt_w_add_generation_prompt = st.session_state['tokenizer'].apply_chat_template(chat, tokenize=False, add_generation_prompt= True)
151
 
@@ -166,10 +170,11 @@ if st.session_state['tokenizer_json'] is not None:
166
  to_private_checkbox = st.checkbox("To Private Repo", key="to_private_checkbox")
167
  create_pr_checkbox = st.checkbox("Create PR (For Contribution 🤗)", key="create_pr_checkbox")
168
  push_to_hub_button = st.button("Push to Hub", key="push_to_hub_button", use_container_width=True)
169
- st.session_state['tokenizer'].save_pretrained(f"./tmp/{st.session_state['uuid']}_{hf_model_repo_name}")
170
- with open(f"./tmp/{st.session_state['uuid']}_{hf_model_repo_name}/tokenizer_config.json", "r") as f:
171
 
172
  tokenizer_config_content = json.loads(f.read())
 
173
 
174
  st.download_button(
175
  label="Download tokenizer_config.json",
@@ -195,3 +200,4 @@ if st.session_state['tokenizer_json'] is not None:
195
  except Exception as e:
196
  st.write(f"Repo id: {st.session_state['repo_id']}")
197
  st.write(str(e))
 
 
21
  def get_existing_templates():
22
  return [None] + os.listdir("./templates")
23
 
24
+ # if os.path.exists("./tmp"):
25
+ # if len(os.listdir("./tmp")) > 20:
26
+ # shutil.rmtree('./tmp')
27
 
28
  # Initialization
29
  if 'tokenizer_json' not in st.session_state:
 
84
  st.session_state['tokenizer'] = AutoTokenizer.from_pretrained(hf_model_repo_name)
85
 
86
  st.session_state['repo_normalized_name'] = hf_model_repo_name.replace("/", "_")
 
87
  st.session_state['tokenizer_json'] = f"./tmp/{st.session_state['uuid']}_{hf_model_repo_name}"
88
+ # st.session_state['tokenizer'].save_pretrained(st.session_state['tokenizer_json'])
89
 
90
  if st.session_state['tokenizer_json'] is not None:
91
+ st.session_state['tokenizer'].save_pretrained(st.session_state['tokenizer_json'])
92
  with open(f"{st.session_state['tokenizer_json']}/tokenizer_config.json", "rb") as f:
93
  tokenizer_json = json.load(f)
94
+ shutil.rmtree(st.session_state['tokenizer_json'])
95
 
96
  json_spec, col2 = st.columns(spec=[0.3, 0.7])
97
 
 
149
  with open(f"./tmp/{st.session_state['uuid']}/tmp_chat_template.json", "r") as f:
150
  jinja_lines = f.readlines()
151
  st.session_state['tokenizer'].chat_template = sanitize_jinja2(jinja_lines)
152
+ os.remove(f"./tmp/{st.session_state['uuid']}/tmp_chat_template.json")
153
  generated_prompt_wo_add_generation_prompt = st.session_state['tokenizer'].apply_chat_template(chat, tokenize=False, add_generation_prompt= False)
154
  generated_prompt_w_add_generation_prompt = st.session_state['tokenizer'].apply_chat_template(chat, tokenize=False, add_generation_prompt= True)
155
 
 
170
  to_private_checkbox = st.checkbox("To Private Repo", key="to_private_checkbox")
171
  create_pr_checkbox = st.checkbox("Create PR (For Contribution 🤗)", key="create_pr_checkbox")
172
  push_to_hub_button = st.button("Push to Hub", key="push_to_hub_button", use_container_width=True)
173
+ st.session_state['tokenizer'].save_pretrained(st.session_state['tokenizer_json'])
174
+ with open(f"{st.session_state['tokenizer_json']}/tokenizer_config.json", "r") as f:
175
 
176
  tokenizer_config_content = json.loads(f.read())
177
+ shutil.rmtree(st.session_state['tokenizer_json'])
178
 
179
  st.download_button(
180
  label="Download tokenizer_config.json",
 
200
  except Exception as e:
201
  st.write(f"Repo id: {st.session_state['repo_id']}")
202
  st.write(str(e))
203
+ os.remove(f"./tmp/{st.session_state['uuid']}/tmp_chat_template.json")