arjunanand13 commited on
Commit
d9ff90d
1 Parent(s): 4841c33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -48,11 +48,11 @@ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
48
 
49
 
50
  model_config = transformers.AutoConfig.from_pretrained(
51
- self.model_id,
52
  # use_auth_token=hf_auth
53
  )
54
  model = transformers.AutoModelForCausalLM.from_pretrained(
55
- self.model_id,
56
  trust_remote_code=True,
57
  config=model_config,
58
  quantization_config=bnb_config,
@@ -60,7 +60,7 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
60
  )
61
  model.eval()
62
  tokenizer = transformers.AutoTokenizer.from_pretrained(
63
- self.model_id,
64
  # use_auth_token=hf_auth
65
  )
66
  generate_text = transformers.pipeline(
@@ -92,17 +92,17 @@ class StopOnTokens(StoppingCriteria):
92
  stopping_criteria = StoppingCriteriaList([StopOnTokens()])
93
 
94
 
95
- generate_text = transformers.pipeline(
96
- model=model,
97
- tokenizer=tokenizer,
98
- return_full_text=True, # langchain expects the full text
99
- task='text-generation',
100
- # we pass model parameters here too
101
- stopping_criteria=stopping_criteria, # without this model rambles during chat
102
- temperature=0.1, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
103
- max_new_tokens=512, # max number of tokens to generate in the output
104
- repetition_penalty=1.1 # without this output begins repeating
105
- )
106
 
107
  llm = HuggingFacePipeline(pipeline=generate_text)
108
 
 
48
 
49
 
50
  model_config = transformers.AutoConfig.from_pretrained(
51
+ model_id,
52
  # use_auth_token=hf_auth
53
  )
54
  model = transformers.AutoModelForCausalLM.from_pretrained(
55
+ model_id,
56
  trust_remote_code=True,
57
  config=model_config,
58
  quantization_config=bnb_config,
 
60
  )
61
  model.eval()
62
  tokenizer = transformers.AutoTokenizer.from_pretrained(
63
+ model_id,
64
  # use_auth_token=hf_auth
65
  )
66
  generate_text = transformers.pipeline(
 
92
  stopping_criteria = StoppingCriteriaList([StopOnTokens()])
93
 
94
 
95
+ # generate_text = transformers.pipeline(
96
+ # model=model,
97
+ # tokenizer=tokenizer,
98
+ # return_full_text=True, # langchain expects the full text
99
+ # task='text-generation',
100
+ # # we pass model parameters here too
101
+ # stopping_criteria=stopping_criteria, # without this model rambles during chat
102
+ # temperature=0.1, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
103
+ # max_new_tokens=512, # max number of tokens to generate in the output
104
+ # repetition_penalty=1.1 # without this output begins repeating
105
+ # )
106
 
107
  llm = HuggingFacePipeline(pipeline=generate_text)
108