Spaces:

rodrigomasini
/

rephrase

Paused

rodrigomasini commited on Nov 7, 2023

Commit

e9589b1

•

1 Parent(s): 136e5a5

Update app_v2.py

Files changed (1) hide show

app_v2.py CHANGED Viewed

@@ -1,51 +1,38 @@
 import streamlit as st
 from transformers import AutoTokenizer
 from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
-from huggingface_hub import snapshot_download
-cwd = os.getcwd()
-cachedir = cwd + '/cache'
-local_folder = cachedir + "/model"
-# Check if the directory exists before creating it
-if not os.path.exists(cachedir):
-    os.mkdir(cachedir)
 # Define pretrained and quantized model directories
-pretrained_quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
-quantized_model_dir = "opt-125m-4bit"
-quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
-# Check if the model has already been downloaded
-model_path = os.path.join(local_folder, 'pytorch_model.bin')
-if not os.path.isfile(model_path):
-    snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)
-model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
-use_strict = False
-use_triton = False
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=True)
-quantize_config = BaseQuantizeConfig(
-    bits=4,
-    group_size=128,
-    desc_act=False
-)
-model = AutoGPTQForCausalLM.from_quantized(
-    local_folder,
-    use_safetensors=True,
-    strict=use_strict,
-    model_basename=model_basename,
-    device="cuda:0",
-    use_triton=use_triton,
-    quantize_config=quantize_config
-)
-st.write(model.hf_device_map)

 import streamlit as st
 from transformers import AutoTokenizer
 from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+import os
 # Define pretrained and quantized model directories
+pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
+quantized_model_dir = "./Jackson2-4bit-128g-GPTQ"
+# Create the cache directory if it doesn't exist
+os.makedirs(quantized_model_dir, exist_ok=True)
+# Quantization configuration
+quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
+# Load the model using Option 1
+model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
+# Starting Streamlit app
+st.title("AutoGPTQ Streamlit App")
+user_input = st.text_input("Input a phrase")
+# Generate output when the "Generate" button is pressed
+if st.button("Generate"):
+    inputs = tokenizer(user_input, return_tensors="pt")
+    outputs = model.generate(
+        **inputs,
+        max_length=512 + inputs['input_ids'].size(-1),
+        temperature=0.1,
+        top_p=0.95,
+        repetition_penalty=1.15
+    )
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    st.text(generated_text)