John6666 commited on
Commit
4ca9e2f
·
verified ·
1 Parent(s): 1d2b6e0

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +13 -12
  2. app.py +182 -0
  3. pre-requirements.txt +1 -0
  4. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
- ---
2
- title: Testcode1
3
- emoji: 📈
4
- colorFrom: green
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.6.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ ---
2
+ title: test code
3
+ emoji: 🙄
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from langchain_community.llms import LlamaCpp
4
+ from langchain.prompts import PromptTemplate
5
+ import llama_cpp
6
+ from langchain.callbacks.manager import CallbackManager
7
+ from sentence_transformers import SentenceTransformer
8
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
+ import numpy as np
10
+ import pandas as pd
11
+ import re
12
+ import os
13
+ from sklearn.metrics.pairwise import cosine_similarity
14
+
15
+ """
16
+ model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2',device='cpu')
17
+
18
+ llm = LlamaCpp(
19
+ model_path=r"C:\Users\Cora\.cache\lm-studio\models\YC-Chen\Breeze-7B-Instruct-v1_0-GGUF\breeze-7b-instruct-v1_0-q4_k_m.gguf",
20
+ n_gpu_layers=100,
21
+ n_batch=512,
22
+ n_ctx=3000,
23
+ f16_kv=True,
24
+ callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
25
+ verbose=False,
26
+ )
27
+ embedd_bk=pd.read_pickle(r"C:\Users\Cora\推薦系統實作\bk_description1_角色形容詞_677.pkl")
28
+ df_bk=pd.read_excel(r"C:\Users\Cora\推薦系統實作\bk_description1_角色形容詞.xlsx")
29
+ """
30
+ llm = None
31
+ model = None
32
+ embedd_bk = None
33
+ df_bk = None
34
+
35
+ def invoke_with_temperature(prompt, temperature=0.4):
36
+ return llm.invoke(prompt, temperature=temperature)
37
+
38
+ def process_user_input(message):
39
+
40
+ user_mental_state4= PromptTemplate(
41
+ input_variables=["input"],
42
+ template="""[INST][/INST]"""
43
+ )
44
+ user_character= PromptTemplate(
45
+ input_variables=["input"],
46
+ template="""[INST][/INST]"""
47
+ )
48
+
49
+ df_user=pd.DataFrame(columns=["輸入內容","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"])
50
+
51
+ prompt_value1=user_mental_state4.invoke({"input":message})
52
+ string=invoke_with_temperature(prompt_value1)
53
+ #print("\n")
54
+
55
+ # 將字符串分割為名詞
56
+ adjectives = [adj.strip() for adj in re.split('[,、,]', string)]
57
+ index=len(df_user)
58
+ df_user.loc[index, '輸入內容'] = message
59
+
60
+ # 確保形容詞數量符合欄位數量
61
+ if len(adjectives) == 3:
62
+ df_user.loc[index, '形容詞1'] = adjectives[0]
63
+ df_user.loc[index, '形容詞2'] = adjectives[1]
64
+ df_user.loc[index, '形容詞3'] = adjectives[2]
65
+ df_user.to_excel("user_gradio系統.xlsx")
66
+ return df_user
67
+ def embedd_df_user(df_user):
68
+ columns_to_encode=df_user.loc[:,["形容詞1", "形容詞2", "形容詞3"]]
69
+
70
+ # 初始化一個空的 DataFrame,用來存儲向量化結果
71
+ embedd_user=df_user[["輸入內容"]]
72
+ #user_em= user_em.assign(形容詞1=None, 形容詞2=None, 形容詞3=None,角色1=None,角色2=None,角色3=None)
73
+ embedd_user= embedd_user.assign(形容詞1=None, 形容詞2=None, 形容詞3=None)
74
+
75
+
76
+ # 遍歷每一個單元格,將結果存入新的 DataFrame 中
77
+ i=len(df_user)-1
78
+ for col in columns_to_encode:
79
+ #print(i,col)
80
+ # 將每個單元格的內容進行向量化
81
+ embedd_user.at[i, col] = model.encode(df_user.at[i, col])
82
+
83
+ embedd_user.to_pickle(r"C:\Users\Cora\推薦系統實作\user_gradio系統.pkl")
84
+
85
+ return embedd_user
86
+
87
+ def top_n_books_by_average(df, n=3):
88
+
89
+ # 根据 `average` 列降序排序
90
+ sorted_df = df.sort_values(by='average', ascending=False)
91
+
92
+ # 选择前 N 行
93
+ top_n_df = sorted_df.head(n)
94
+
95
+ # 提取书名列
96
+ top_books = top_n_df['書名'].tolist()
97
+
98
+ return top_books,sorted_df
99
+
100
+ def similarity(embedd_user,embedd_bk,df_bk):
101
+ df_similarity= pd.DataFrame(df_bk[['書名',"內容簡介","URL","形容詞1", "形容詞2", "形容詞3", '角色1', '角色2', '角色3']])
102
+ df_similarity['average'] = np.nan
103
+ #for p in range(len(embedd_user)):
104
+ index=len(embedd_user)-1
105
+ for k in range(len(embedd_bk)):
106
+ list=[]
107
+ for i in range(1,4):
108
+ for j in range(3,6):
109
+ vec1=embedd_user.iloc[index,i]#i是第i個形容詞,數字是第幾個是使用者輸入
110
+ vec2=embedd_bk.iloc[k,j]
111
+ similarity = cosine_similarity([vec1], [vec2])
112
+ list.append(similarity[0][0])
113
+ # 计算总和
114
+ total_sum = sum(list)
115
+ # 计算数量
116
+ count = len(list)
117
+ # 计算平均值
118
+ average = total_sum / count
119
+ df_similarity.loc[k,'average']=average
120
+
121
+ top_books,sorted_df = top_n_books_by_average(df_similarity)
122
+ return sorted_df
123
+
124
+ def filter(sorted_df):
125
+ filter_prompt4 = PromptTemplate(
126
+ input_variables=["mental_issue", "user_identity"," book","book_reader", "book_description"],
127
+ template="""[INST][/INST]"""
128
+ )
129
+ df_filter=sorted_df.iloc[:20,:]
130
+ df_filter = df_filter.reset_index(drop=True)
131
+ df_filter=df_filter.assign(推薦=None)
132
+
133
+ df_user=pd.DataFrame(columns=["輸入內��","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"]) #
134
+
135
+ p=len(df_user)-1
136
+ for k in range(len(df_filter)):
137
+ word=df_user["輸入內容"].iloc[p]
138
+ #book_reader = df_filter["角色1"].iloc[p] + "or" + df_filter["角色2"].iloc[p] + "or" + df_filter["角色3"].iloc[p]
139
+ book=df_filter["書名"].iloc[k]
140
+ book_reader = df_filter["角色1"].iloc[k]
141
+ user_identity = df_user["角色1"].iloc[p]
142
+ mental_issue=df_user["形容詞1"].iloc[p]+"、"+df_user["形容詞2"].iloc[p]+"、"+df_user["形容詞3"].iloc[p]
143
+ book_description=df_filter["形容詞1"].iloc[k]+"、"+df_filter["形容詞2"].iloc[k]+"、"+df_filter["形容詞3"].iloc[k]
144
+ print(book_reader)
145
+ print(user_identity)
146
+ #output = filter_prompt1.invoke({"user_identity": user_identity, "book_reader": book_reader})
147
+ output = filter_prompt4.invoke({"mental_issue":mental_issue,"user_identity": user_identity, "book":book,"book_description":book_description,"book_reader": book_reader})
148
+ string2=invoke_with_temperature(output)
149
+ df_filter.loc[k, '推薦'] =string2
150
+ df_recommend=df_filter[df_filter["推薦"].str.strip() == "是"]
151
+
152
+ return df_recommend
153
+ def output_content(df_recommend):
154
+ content_prompt = PromptTemplate(
155
+ input_variables=["content"],
156
+ template="""[INST][/INST]"""
157
+ )
158
+
159
+ a=0
160
+ title=df_recommend.loc[a,"書名"]
161
+ prompt_value1=recommend_prompt.invoke({"title":title,"URL":URL,"summary":summary})
162
+
163
+ recommend_prompt = PromptTemplate(
164
+ input_variables=["title"],
165
+ #template=
166
+ )
167
+ prompt_value1=recommend_prompt.invoke({"title":title})
168
+ output=invoke_with_temperature(prompt_value1,temperature=0.4)
169
+ return output
170
+
171
+ def main_pipeline(message,history):
172
+ df_user=process_user_input(message)
173
+ embedd_user=embedd_df_user(df_user)
174
+ sorted_df=similarity(embedd_user,embedd_bk,df_bk)
175
+ df_filter=filter(sorted_df)
176
+ final=output_content(df_filter)
177
+ return final
178
+
179
+ demo=gr.ChatInterface(main_pipeline)
180
+
181
+ if __name__ == "__main__":
182
+ demo.launch()
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=24.1
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ scikit-build-core
3
+ llama-cpp-python
4
+ scikit-learn
5
+ numpy<2
6
+ langchain