storresbusquets commited on
Commit
48d5a82
1 Parent(s): e953355

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -23
app.py CHANGED
@@ -1,50 +1,67 @@
1
  import gradio as gr
2
  import torch
3
  import transformers
4
- # from transformers import AutoTokenizer
5
- from langchain import LLMChain, HuggingFacePipeline, PromptTemplate
6
- import os
7
- from ctransformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
  # model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
10
  # tokenizer = AutoTokenizer.from_pretrained(model)
11
 
12
- access_token = os.getenv("Llama2")
13
 
14
  def greet(text):
15
 
16
- model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
17
- tokenizer = AutoTokenizer.from_pretrained(model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # model = "meta-llama/Llama-2-7b-hf"
20
  # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
21
 
22
- pipeline = transformers.pipeline(
23
- "text-generation",
24
- model=model,
25
- tokenizer=tokenizer,
26
  # torch_dtype=torch.bfloat16,
27
  # trust_remote_code=True,
28
  # device_map="auto",
29
- max_length=512,
30
- max_new_tokens=256,
31
- do_sample=True,
32
  # top_k=10,
33
- num_return_sequences=1,
34
- eos_token_id=tokenizer.eos_token_id,
35
  # token=access_token
36
  )
37
 
38
- llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})
39
 
40
- template = """Write a concise summary of the following:
41
- "{text}"
42
- CONCISE SUMMARY:"""
43
 
44
- prompt = PromptTemplate(template=template, input_variables=["text"])
45
- llm_chain = LLMChain(prompt=prompt, llm=llm)
46
 
47
- return llm_chain.run(text)
48
 
49
  with gr.Blocks() as demo:
50
 
 
1
  import gradio as gr
2
  import torch
3
  import transformers
4
+ from langchain.llms import CTransformers
5
+ from langchain import PromptTemplate, LLMChain
6
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
7
 
8
  # model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
9
  # tokenizer = AutoTokenizer.from_pretrained(model)
10
 
11
+ # access_token = os.getenv("Llama2")
12
 
13
  def greet(text):
14
 
15
+ llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])
16
+
17
+ template = """
18
+ [INST] <<SYS>>
19
+ You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
20
+ <</SYS>>
21
+ {text}[/INST]
22
+ """
23
+
24
+ prompt = PromptTemplate(template=template, input_variables=["text"])
25
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
26
+ summary = llm_chain.run(text)
27
+
28
+ return summary
29
+
30
+
31
+
32
+
33
+ # model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
34
+ # tokenizer = AutoTokenizer.from_pretrained(model)
35
 
36
  # model = "meta-llama/Llama-2-7b-hf"
37
  # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
38
 
39
+ # pipeline = transformers.pipeline(
40
+ # "text-generation",
41
+ # model=model,
42
+ # tokenizer=tokenizer,
43
  # torch_dtype=torch.bfloat16,
44
  # trust_remote_code=True,
45
  # device_map="auto",
46
+ # max_length=512,
47
+ # max_new_tokens=256,
48
+ # do_sample=True,
49
  # top_k=10,
50
+ # num_return_sequences=1,
51
+ # eos_token_id=tokenizer.eos_token_id,
52
  # token=access_token
53
  )
54
 
55
+ # llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})
56
 
57
+ # template = """Write a concise summary of the following:
58
+ # "{text}"
59
+ # CONCISE SUMMARY:"""
60
 
61
+ # prompt = PromptTemplate(template=template, input_variables=["text"])
62
+ # llm_chain = LLMChain(prompt=prompt, llm=llm)
63
 
64
+ # return llm_chain.run(text)
65
 
66
  with gr.Blocks() as demo:
67