Chris4K commited on
Commit
19546ba
·
verified ·
1 Parent(s): 2f503a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -54,35 +54,37 @@ model_pipeline = pipeline(
54
  )
55
 
56
  # Use the pipeline in HuggingFacePipeline
57
- #llm = HuggingFacePipeline(pipeline=model_pipeline)
58
 
59
  ##### Alternative
60
  from transformers import pipeline
61
  import torch
62
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
63
 
64
- READER_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
65
-
66
- bnb_config = BitsAndBytesConfig(
67
- load_in_4bit=True,
68
- bnb_4bit_use_double_quant=True,
69
- bnb_4bit_quant_type="nf4",
70
- bnb_4bit_compute_dtype=torch.bfloat16,
71
- )
72
- rmodel = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
73
- tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
74
-
75
- llm = pipeline(
76
- model=rmodel,
77
- tokenizer=tokenizer,
78
- task="text-generation",
79
- do_sample=True,
80
- temperature=0.2,
81
- repetition_penalty=1.1,
82
- return_full_text=False,
83
- max_new_tokens=500,
84
- )
85
  #####
 
 
86
  #repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
87
 
88
  #llm_client = InferenceClient(model=repo_id, timeout=120)
 
54
  )
55
 
56
  # Use the pipeline in HuggingFacePipeline
57
+ llm = HuggingFacePipeline(pipeline=model_pipeline)
58
 
59
  ##### Alternative
60
  from transformers import pipeline
61
  import torch
62
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
63
 
64
+ #READER_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
65
+
66
+ #bnb_config = BitsAndBytesConfig(
67
+ # load_in_4bit=True,
68
+ # bnb_4bit_use_double_quant=True,
69
+ # bnb_4bit_quant_type="nf4",
70
+ # bnb_4bit_compute_dtype=torch.bfloat16,
71
+ #)
72
+ #rmodel = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
73
+ #tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
74
+
75
+ #llm = pipeline(
76
+ # model=rmodel,
77
+ # tokenizer=tokenizer,
78
+ # task="text-generation",
79
+ # do_sample=True,
80
+ # temperature=0.2,
81
+ # repetition_penalty=1.1,
82
+ # return_full_text=False,
83
+ # max_new_tokens=500,
84
+ #)
85
  #####
86
+ from huggingface_hub import InferenceClient
87
+
88
  #repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
89
 
90
  #llm_client = InferenceClient(model=repo_id, timeout=120)