rodrigomasini commited on
Commit
e9589b1
1 Parent(s): 136e5a5

Update app_v2.py

Browse files
Files changed (1) hide show
  1. app_v2.py +33 -46
app_v2.py CHANGED
@@ -1,51 +1,38 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer
3
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
- from huggingface_hub import snapshot_download
5
-
6
- cwd = os.getcwd()
7
- cachedir = cwd + '/cache'
8
-
9
- local_folder = cachedir + "/model"
10
-
11
- # Check if the directory exists before creating it
12
- if not os.path.exists(cachedir):
13
- os.mkdir(cachedir)
14
 
15
  # Define pretrained and quantized model directories
16
- pretrained_quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
17
- quantized_model_dir = "opt-125m-4bit"
18
-
19
- quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
20
-
21
- # Check if the model has already been downloaded
22
- model_path = os.path.join(local_folder, 'pytorch_model.bin')
23
- if not os.path.isfile(model_path):
24
- snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)
25
-
26
- model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
27
-
28
- use_strict = False
29
- use_triton = False
30
-
31
- # Load tokenizer and model
32
- tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=True)
33
-
34
- quantize_config = BaseQuantizeConfig(
35
- bits=4,
36
- group_size=128,
37
- desc_act=False
38
- )
39
-
40
- model = AutoGPTQForCausalLM.from_quantized(
41
- local_folder,
42
- use_safetensors=True,
43
- strict=use_strict,
44
- model_basename=model_basename,
45
- device="cuda:0",
46
- use_triton=use_triton,
47
- quantize_config=quantize_config
48
- )
49
-
50
- st.write(model.hf_device_map)
51
-
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer
3
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
+ import os
 
 
 
 
 
 
 
 
 
5
 
6
  # Define pretrained and quantized model directories
7
+ pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
8
+ quantized_model_dir = "./Jackson2-4bit-128g-GPTQ"
9
+
10
+ # Create the cache directory if it doesn't exist
11
+ os.makedirs(quantized_model_dir, exist_ok=True)
12
+
13
+ # Quantization configuration
14
+ quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
15
+
16
+ # Load the tokenizer
17
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
18
+
19
+ # Load the model using Option 1
20
+ model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
21
+
22
+ # Starting Streamlit app
23
+ st.title("AutoGPTQ Streamlit App")
24
+
25
+ user_input = st.text_input("Input a phrase")
26
+
27
+ # Generate output when the "Generate" button is pressed
28
+ if st.button("Generate"):
29
+ inputs = tokenizer(user_input, return_tensors="pt")
30
+ outputs = model.generate(
31
+ **inputs,
32
+ max_length=512 + inputs['input_ids'].size(-1),
33
+ temperature=0.1,
34
+ top_p=0.95,
35
+ repetition_penalty=1.15
36
+ )
37
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
38
+ st.text(generated_text)