|
import gradio as gr
|
|
from llama_cpp import Llama
|
|
from langchain_community.llms import LlamaCpp
|
|
from langchain.prompts import PromptTemplate
|
|
import llama_cpp
|
|
from langchain.callbacks.manager import CallbackManager
|
|
from sentence_transformers import SentenceTransformer
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
import numpy as np
|
|
import pandas as pd
|
|
import re
|
|
import os
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = None
|
|
model = None
|
|
embedd_bk = None
|
|
df_bk = None
|
|
|
|
def invoke_with_temperature(prompt, temperature=0.4):
|
|
return llm.invoke(prompt, temperature=temperature)
|
|
|
|
def process_user_input(message):
|
|
|
|
user_mental_state4= PromptTemplate(
|
|
input_variables=["input"],
|
|
template="""[INST][/INST]"""
|
|
)
|
|
user_character= PromptTemplate(
|
|
input_variables=["input"],
|
|
template="""[INST][/INST]"""
|
|
)
|
|
|
|
df_user=pd.DataFrame(columns=["輸入內容","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"])
|
|
|
|
prompt_value1=user_mental_state4.invoke({"input":message})
|
|
string=invoke_with_temperature(prompt_value1)
|
|
|
|
|
|
|
|
adjectives = [adj.strip() for adj in re.split('[,、,]', string)]
|
|
index=len(df_user)
|
|
df_user.loc[index, '輸入內容'] = message
|
|
|
|
|
|
if len(adjectives) == 3:
|
|
df_user.loc[index, '形容詞1'] = adjectives[0]
|
|
df_user.loc[index, '形容詞2'] = adjectives[1]
|
|
df_user.loc[index, '形容詞3'] = adjectives[2]
|
|
df_user.to_excel("user_gradio系統.xlsx")
|
|
return df_user
|
|
def embedd_df_user(df_user):
|
|
columns_to_encode=df_user.loc[:,["形容詞1", "形容詞2", "形容詞3"]]
|
|
|
|
|
|
embedd_user=df_user[["輸入內容"]]
|
|
|
|
embedd_user= embedd_user.assign(形容詞1=None, 形容詞2=None, 形容詞3=None)
|
|
|
|
|
|
|
|
i=len(df_user)-1
|
|
for col in columns_to_encode:
|
|
|
|
|
|
embedd_user.at[i, col] = model.encode(df_user.at[i, col])
|
|
|
|
embedd_user.to_pickle(r"C:\Users\Cora\推薦系統實作\user_gradio系統.pkl")
|
|
|
|
return embedd_user
|
|
|
|
def top_n_books_by_average(df, n=3):
|
|
|
|
|
|
sorted_df = df.sort_values(by='average', ascending=False)
|
|
|
|
|
|
top_n_df = sorted_df.head(n)
|
|
|
|
|
|
top_books = top_n_df['書名'].tolist()
|
|
|
|
return top_books,sorted_df
|
|
|
|
def similarity(embedd_user,embedd_bk,df_bk):
|
|
df_similarity= pd.DataFrame(df_bk[['書名',"內容簡介","URL","形容詞1", "形容詞2", "形容詞3", '角色1', '角色2', '角色3']])
|
|
df_similarity['average'] = np.nan
|
|
|
|
index=len(embedd_user)-1
|
|
for k in range(len(embedd_bk)):
|
|
list=[]
|
|
for i in range(1,4):
|
|
for j in range(3,6):
|
|
vec1=embedd_user.iloc[index,i]
|
|
vec2=embedd_bk.iloc[k,j]
|
|
similarity = cosine_similarity([vec1], [vec2])
|
|
list.append(similarity[0][0])
|
|
|
|
total_sum = sum(list)
|
|
|
|
count = len(list)
|
|
|
|
average = total_sum / count
|
|
df_similarity.loc[k,'average']=average
|
|
|
|
top_books,sorted_df = top_n_books_by_average(df_similarity)
|
|
return sorted_df
|
|
|
|
def filter(sorted_df):
|
|
filter_prompt4 = PromptTemplate(
|
|
input_variables=["mental_issue", "user_identity"," book","book_reader", "book_description"],
|
|
template="""[INST][/INST]"""
|
|
)
|
|
df_filter=sorted_df.iloc[:20,:]
|
|
df_filter = df_filter.reset_index(drop=True)
|
|
df_filter=df_filter.assign(推薦=None)
|
|
|
|
df_user=pd.DataFrame(columns=["輸入內容","形容詞1", "形容詞2", "形容詞3", "角色1", "角色2", "角色3"])
|
|
|
|
p=len(df_user)-1
|
|
for k in range(len(df_filter)):
|
|
word=df_user["輸入內容"].iloc[p]
|
|
|
|
book=df_filter["書名"].iloc[k]
|
|
book_reader = df_filter["角色1"].iloc[k]
|
|
user_identity = df_user["角色1"].iloc[p]
|
|
mental_issue=df_user["形容詞1"].iloc[p]+"、"+df_user["形容詞2"].iloc[p]+"、"+df_user["形容詞3"].iloc[p]
|
|
book_description=df_filter["形容詞1"].iloc[k]+"、"+df_filter["形容詞2"].iloc[k]+"、"+df_filter["形容詞3"].iloc[k]
|
|
print(book_reader)
|
|
print(user_identity)
|
|
|
|
output = filter_prompt4.invoke({"mental_issue":mental_issue,"user_identity": user_identity, "book":book,"book_description":book_description,"book_reader": book_reader})
|
|
string2=invoke_with_temperature(output)
|
|
df_filter.loc[k, '推薦'] =string2
|
|
df_recommend=df_filter[df_filter["推薦"].str.strip() == "是"]
|
|
|
|
return df_recommend
|
|
def output_content(df_recommend):
|
|
content_prompt = PromptTemplate(
|
|
input_variables=["content"],
|
|
template="""[INST][/INST]"""
|
|
)
|
|
|
|
a=0
|
|
title=df_recommend.loc[a,"書名"]
|
|
prompt_value1=recommend_prompt.invoke({"title":title,"URL":URL,"summary":summary})
|
|
|
|
recommend_prompt = PromptTemplate(
|
|
input_variables=["title"],
|
|
|
|
)
|
|
prompt_value1=recommend_prompt.invoke({"title":title})
|
|
output=invoke_with_temperature(prompt_value1,temperature=0.4)
|
|
return output
|
|
|
|
def main_pipeline(message,history):
|
|
df_user=process_user_input(message)
|
|
embedd_user=embedd_df_user(df_user)
|
|
sorted_df=similarity(embedd_user,embedd_bk,df_bk)
|
|
df_filter=filter(sorted_df)
|
|
final=output_content(df_filter)
|
|
return final
|
|
|
|
demo=gr.ChatInterface(main_pipeline)
|
|
|
|
if __name__ == "__main__":
|
|
demo.launch() |