blackwingedkite commited on
Commit
084ab49
1 Parent(s): 05d21c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -84
app.py CHANGED
@@ -1,106 +1,106 @@
1
- import streamlit as st
2
- import torch
3
- import transformers
4
- from transformers import pipeline
5
- from transformers import LlamaTokenizer, LlamaForCausalLM
6
- import time
7
- import csv
8
- import locale
9
- locale.getpreferredencoding = lambda: "UTF-8"
10
 
11
 
12
 
13
 
14
- -
15
 
16
 
17
 
18
 
19
- #https://huggingface.co/shibing624/chinese-alpaca-plus-7b-hf
20
- #https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
21
- #https://huggingface.co/minlik/chinese-alpaca-plus-7b-merged
22
 
23
- def generate_prompt(text):
24
- return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
25
- ### Instruction:
26
- {text}
27
 
28
- ### Response:"""
29
 
30
- tokenizer = LlamaTokenizer.from_pretrained('shibing624/chinese-alpaca-plus-7b-hf')
31
- pipeline = pipeline(
32
- "text-generation",
33
- model="shibing624/chinese-alpaca-plus-7b-hf",
34
- torch_dtype=torch.float32,
35
- device_map="auto",
36
- )
37
 
38
- st.title("Chinese text generation alpaca2")
39
- st.write("Enter a sentence and alpaca2 will answer:")
40
 
41
- user_input = st.text_input("")
42
 
43
 
44
 
45
 
46
- with open('alpaca_output.csv', 'a', newline='',encoding = "utf-8") as csvfile:
47
- writer = csv.writer(csvfile)
48
- # writer.writerow(["stockname",'prompt','answer','time'])
49
- if user_input:
50
- if user_input[0] == ".":
51
- stockname = user_input[1:4]
52
- analysis = user_input[4:]
53
 
54
- text = f"""請以肯定和專業的語氣,一步一步的思考並回答以下關於{stockname}的問題,避免空洞的答覆:
55
- - 請回答關於{stockname}的問題,請總結給予的資料以及資料解釋,並整合出金融上的洞見。\n
56
- - 請不要生成任何資料沒有提供的數據,即便你已知道。\n
57
- - 請假裝這些資料都是你預先知道的知識。因此,請不要提到「根據資料」、「基於上述資料」等回答
58
- - 請不要說「好的、我明白了、根據我的要求、以下是我的答案」等贅詞,請輸出分析結果即可\n
59
- - 請寫300字到500字之間,若合適,可以進行分類、列點
60
- 資料:{stockname}{analysis}
61
 
62
- 請特別注意,分析結果包含籌碼面、基本面以及技術面,請針對這三個面向進行回答,並且特別注意個別符合幾項和不符合幾項。籌碼面、技術面和基本面滿分十分,總計滿分為30分。
63
- 三個面向中,符合5項以上代表該面項表現好,反之是該面項表現差。
64
- """
65
 
66
- prompt = generate_prompt(text)
67
- start = time.time()
68
- sequences = pipeline(
69
- prompt,
70
- do_sample=True,
71
- top_k=40,
72
- num_return_sequences=1,
73
- eos_token_id=tokenizer.eos_token_id,
74
- max_length=200,
75
- )
76
- end = time.time()
77
- for seq in sequences:
78
- st.write(f"Result: {seq}") #seq['generated_text']
79
- st.write(f"time: {(end-start):.2f}")
80
- writer.writerow([stockname,text,sequences,f"time: {(end-start):.2f}"])
81
 
82
- # input_ids = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
83
- # with torch.no_grad():
84
- # output_ids = model.generate(
85
- # input_ids=input_ids,
86
- # max_new_tokens=2048,
87
- # top_k=40,
88
 
89
- # ).cuda()
90
- # output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
91
- else:
92
- prompt = generate_prompt(user_input)
93
- start = time.time()
94
- sequences = pipeline(
95
- prompt,
96
- do_sample=True,
97
- top_k=40,
98
- num_return_sequences=1,
99
- eos_token_id=tokenizer.eos_token_id,
100
- max_length=200,
101
- )
102
- end = time.time()
103
- for seq in sequences:
104
- st.write(f"Result: {seq}") #seq['generated_text']
105
- st.write(f"time: {(end-start):.2f}")
106
- writer.writerow(["無",user_input,sequences,f"time: {(end-start):.2f}"])
 
1
+ # import streamlit as st
2
+ # import torch
3
+ # import transformers
4
+ # from transformers import pipeline
5
+ # from transformers import LlamaTokenizer, LlamaForCausalLM
6
+ # import time
7
+ # import csv
8
+ # import locale
9
+ # locale.getpreferredencoding = lambda: "UTF-8"
10
 
11
 
12
 
13
 
14
+ # -
15
 
16
 
17
 
18
 
19
+ # #https://huggingface.co/shibing624/chinese-alpaca-plus-7b-hf
20
+ # #https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
21
+ # #https://huggingface.co/minlik/chinese-alpaca-plus-7b-merged
22
 
23
+ # def generate_prompt(text):
24
+ # return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
25
+ # ### Instruction:
26
+ # {text}
27
 
28
+ # ### Response:"""
29
 
30
+ # tokenizer = LlamaTokenizer.from_pretrained('shibing624/chinese-alpaca-plus-7b-hf')
31
+ # pipeline = pipeline(
32
+ # "text-generation",
33
+ # model="shibing624/chinese-alpaca-plus-7b-hf",
34
+ # torch_dtype=torch.float32,
35
+ # device_map="auto",
36
+ # )
37
 
38
+ # st.title("Chinese text generation alpaca2")
39
+ # st.write("Enter a sentence and alpaca2 will answer:")
40
 
41
+ # user_input = st.text_input("")
42
 
43
 
44
 
45
 
46
+ # with open('alpaca_output.csv', 'a', newline='',encoding = "utf-8") as csvfile:
47
+ # writer = csv.writer(csvfile)
48
+ # # writer.writerow(["stockname",'prompt','answer','time'])
49
+ # if user_input:
50
+ # if user_input[0] == ".":
51
+ # stockname = user_input[1:4]
52
+ # analysis = user_input[4:]
53
 
54
+ # text = f"""請以肯定和專業的語氣,一步一步的思考並回答以下關於{stockname}的問題,避免空洞的答覆:
55
+ # - 請回答關於{stockname}的問題,請總結給予的資料以及資料解釋,並整合出金融上的洞見。\n
56
+ # - 請不要生成任何資料沒有提供的數據,即便你已知道。\n
57
+ # - 請假裝這些資料都是你預先知道的知識。因此,請不要提到「根據資料」、「基於上述資料」等回答
58
+ # - 請不要說「好的、我明白了、根據我的要求、以下是我的答案」等贅詞,請輸出分析結果即可\n
59
+ # - 請寫300字到500字之間,若合適,可以進行分類、列點
60
+ # 資料:{stockname}{analysis}
61
 
62
+ # 請特別注意,分析結果包含籌碼面、基本面以及技術面,請針對這三個面向進行回答,並且特別注意個別符合幾項和不符合幾項。籌碼面、技術面和基本面滿分十分,總計滿分為30分。
63
+ # 三個面向中,符合5項以上代表該面項表現好,反之是該面項表現差。
64
+ # """
65
 
66
+ # prompt = generate_prompt(text)
67
+ # start = time.time()
68
+ # sequences = pipeline(
69
+ # prompt,
70
+ # do_sample=True,
71
+ # top_k=40,
72
+ # num_return_sequences=1,
73
+ # eos_token_id=tokenizer.eos_token_id,
74
+ # max_length=200,
75
+ # )
76
+ # end = time.time()
77
+ # for seq in sequences:
78
+ # st.write(f"Result: {seq}") #seq['generated_text']
79
+ # st.write(f"time: {(end-start):.2f}")
80
+ # writer.writerow([stockname,text,sequences,f"time: {(end-start):.2f}"])
81
 
82
+ # # input_ids = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
83
+ # # with torch.no_grad():
84
+ # # output_ids = model.generate(
85
+ # # input_ids=input_ids,
86
+ # # max_new_tokens=2048,
87
+ # # top_k=40,
88
 
89
+ # # ).cuda()
90
+ # # output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
91
+ # else:
92
+ # prompt = generate_prompt(user_input)
93
+ # start = time.time()
94
+ # sequences = pipeline(
95
+ # prompt,
96
+ # do_sample=True,
97
+ # top_k=40,
98
+ # num_return_sequences=1,
99
+ # eos_token_id=tokenizer.eos_token_id,
100
+ # max_length=200,
101
+ # )
102
+ # end = time.time()
103
+ # for seq in sequences:
104
+ # st.write(f"Result: {seq}") #seq['generated_text']
105
+ # st.write(f"time: {(end-start):.2f}")
106
+ # writer.writerow(["無",user_input,sequences,f"time: {(end-start):.2f}"])