File size: 8,097 Bytes
6ee980f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from flask import Flask, request, jsonify, render_template_string, Response
import time
from flask_sse import sse
import redis

# Flaskアプリケーションの設定
app = Flask(__name__)
app.config["REDIS_URL"] = "redis://localhost:6379/0"
app.register_blueprint(sse, url_prefix='/stream')

# デバイスの設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# トークナイザーとモデルの読み込み
tokenizer = AutoTokenizer.from_pretrained("inu-ai/alpaca-guanaco-japanese-gpt-1b", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("inu-ai/alpaca-guanaco-japanese-gpt-1b").to(device)

# 定数
MAX_ASSISTANT_LENGTH = 100
MAX_INPUT_LENGTH = 1024
INPUT_PROMPT = r'<s>\n以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。\n[SEP]\n指示:\n{instruction}\n[SEP]\n入力:\n{input}\n[SEP]\n応答:\n'
NO_INPUT_PROMPT = r'<s>\n以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n[SEP]\n指示:\n{instruction}\n[SEP]\n応答:\n'

# HTMLテンプレート
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="ja">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Chat Interface</title>
    <style>
        body { font-family: Arial, sans-serif; }
        .container { max-width: 600px; margin: auto; padding: 20px; }
        .chat-box { border: 1px solid #ccc; padding: 10px; height: 300px; overflow-y: scroll; }
        .chat-entry { margin-bottom: 10px; }
        .chat-entry.user { text-align: right; }
        .input-group { display: flex; }
        .input-group input { flex: 1; padding: 10px; border: 1px solid #ccc; border-radius: 4px; }
        .input-group button { padding: 10px; border: none; background-color: #28a745; color: white; cursor: pointer; }
        .input-group button:hover { background-color: #218838; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Chat Interface</h1>
        <div class="chat-box" id="chat-box"></div>
        <div class="input-group">
            <input type="text" id="user-input" placeholder="質問を入力してください...">
            <button onclick="sendMessage()">送信</button>
        </div>
    </div>
    <script>
        const chatBox = document.getElementById('chat-box');
        const userInput = document.getElementById('user-input');
        let conversationHistory = [];

        function addMessageToChat(role, message) {
            const entry = document.createElement('div');
            entry.className = 'chat-entry ' + role;
            entry.textContent = role === 'user' ? 'User: ' + message : 'Assistant: ' + message;
            chatBox.appendChild(entry);
            chatBox.scrollTop = chatBox.scrollHeight;
        }

        function sendMessage() {
            const message = userInput.value.trim();
            if (message === '') return;

            addMessageToChat('user', message);
            userInput.value = '';

            fetch('/generate', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({
                    role_instruction: [
                        "User:あなたは「ずんだもん」なのだ。東北ずん子の武器である「ずんだアロー」に変身する妖精またはマスコットなのだ。一人称は「ボク」で語尾に「なのだ」を付けてしゃべるのだ。",
                        "Assistant:了解したのだ!"
                    ],
                    conversation_history: conversationHistory,
                    new_conversation: message
                })
            })
            .then(response => response.json())
            .then(data => {
                const assistantMessage = data.response.split('Assistant:')[1].trim();
                addMessageToChat('assistant', assistantMessage);
                conversationHistory.push('User:' + message);
                conversationHistory.push('Assistant:' + assistantMessage);
            })
            .catch(error => {
                console.error('Error:', error);
                alert('エラーが発生しました。コンソールを確認してください。');
            });
        }

        // SSEの設定
        const eventSource = new EventSource("/stream");

        eventSource.onmessage = function(event) {
            const message = event.data;
            addMessageToChat('assistant', message);
        };
    </script>
</body>
</html>
"""

def prepare_input(role_instruction, conversation_history, new_conversation):
    """入力テキストを整形する関数"""
    instruction = "".join([f"{text}\n" for text in role_instruction])
    instruction += "\n".join(conversation_history)
    input_text = f"User:{new_conversation}"
    return INPUT_PROMPT.format(instruction=instruction, input=input_text)

def format_output(output):
    """生成された出力を整形する関数"""
    return output.lstrip("<s>").rstrip("</s>").replace("[SEP]", "").replace("\\n", "\n")

def trim_conversation_history(conversation_history, max_length):
    """会話履歴を最大長に収めるために調整する関数"""
    while len(conversation_history) > 2 and sum([len(tokenizer.encode(text, add_special_tokens=False)) for text in conversation_history]) + max_length > MAX_INPUT_LENGTH:
        conversation_history.pop(0)
        conversation_history.pop(0)
    return conversation_history

def generate_response(role_instruction, conversation_history, new_conversation):
    """新しい会話に対する応答を生成する関数"""
    conversation_history = trim_conversation_history(conversation_history, MAX_ASSISTANT_LENGTH)
    input_text = prepare_input(role_instruction, conversation_history, new_conversation)
    token_ids = tokenizer.encode(input_text, add_special_tokens=False, return_tensors="pt")

    with torch.no_grad():
        output_ids = model.generate(
            token_ids.to(model.device),
            min_length=len(token_ids[0]),
            max_length=min(MAX_INPUT_LENGTH, len(token_ids[0]) + MAX_ASSISTANT_LENGTH),
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            bos_token_id=tokenizer.bos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            bad_words_ids=[[tokenizer.unk_token_id]]
        )

    output = tokenizer.decode(output_ids.tolist()[0])
    formatted_output_all = format_output(output)

    response = f"Assistant:{formatted_output_all.split('応答:')[-1].strip()}"
    conversation_history.append(f"User:{new_conversation}".replace("\n", "\\n"))
    conversation_history.append(response.replace("\n", "\\n"))

    return formatted_output_all, response

@app.route('/')
def home():
    """ホームページをレンダリング"""
    return render_template_string(HTML_TEMPLATE)

@app.route('/generate', methods=['POST'])
def generate():
    """Flaskエンドポイント: /generate"""
    data = request.json
    role_instruction = data.get('role_instruction', [])
    conversation_history = data.get('conversation_history', [])
    new_conversation = data.get('new_conversation', "")

    if not role_instruction or not new_conversation:
        return jsonify({"error": "role_instruction and new_conversation are required fields"}), 400

    formatted_output_all, response = generate_response(role_instruction, conversation_history, new_conversation)
    
    # ここでSSEを介してリアルタイムで応答をストリームします
    for word in response.split():
        sse.publish({"message": word}, type='message')
        time.sleep(0.5)  # 送信間隔をシミュレート

    return jsonify({"response": response, "conversation_history": conversation_history})

if __name__ == '__main__':
    app.run(debug=True, host="0.0.0.0", port=7860)