Spaces:
Build error
Build error
import sys | |
import os | |
import torch | |
from safetensors.torch import load_file | |
from transformers import AutoConfig, GenerationConfig | |
import json | |
from flask import Flask, request, jsonify, render_template | |
app = Flask(__name__) | |
# Define paths | |
drive_folder = '/app' # Path where files will be downloaded in Docker container | |
tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json') | |
model_config_file = os.path.join(drive_folder, 'config.json') | |
# Add the custom tokenizer and model paths to sys.path | |
sys.path.append(drive_folder) | |
# Debugging print statements | |
print(f"Drive folder: {drive_folder}") | |
print(f"Tokenizer config file: {tokenizer_config_file}") | |
print(f"Model config file: {model_config_file}") | |
# Import the custom configuration, tokenizer, and model classes | |
try: | |
from configuration_qwen import QWenConfig | |
from tokenization_qwen import QWenTokenizer | |
from modeling_qwen import QWenLMHeadModel | |
print("Imported custom classes successfully!") | |
except ImportError as e: | |
print(f"Import error: {e}") | |
raise | |
# Ensure the tokenizer configuration file exists | |
if not os.path.exists(tokenizer_config_file): | |
raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}") | |
# Load the tokenizer configuration | |
with open(tokenizer_config_file, 'r') as f: | |
tokenizer_config = json.load(f) | |
# Load the model configuration from the provided config file | |
with open(model_config_file, 'r') as f: | |
model_config = json.load(f) | |
# Disable FlashAttention for non-supported GPUs | |
model_config["use_flash_attn"] = False | |
model_config["use_dynamic_ntk"] = False # Disable other advanced features if necessary | |
# Use the provided configuration for model initialization | |
try: | |
tokenizer = QWenTokenizer.from_pretrained(drive_folder) | |
model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config)) | |
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) | |
print("Model and tokenizer loaded successfully!") | |
except Exception as e: | |
print("Error loading model or tokenizer:", e) | |
raise | |
def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9): | |
try: | |
# Tokenize the input | |
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device) | |
# Set up generation configuration | |
generation_config = GenerationConfig( | |
max_length=max_length + len(input_ids[0]), | |
do_sample=True, | |
temperature=temperature, | |
top_k=top_k, | |
top_p=top_p, | |
pad_token_id=tokenizer.eos_token_id, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
# Generate text using advanced sampling | |
outputs = model.generate( | |
input_ids, | |
generation_config=generation_config | |
) | |
# Decode the generated sequence | |
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Clean up the output | |
start_index = decoded_output.find(prompt) | |
generated_text = decoded_output[start_index + len(prompt):].strip() | |
return generated_text | |
except Exception as e: | |
print("Error during text generation:", e) | |
raise | |
def home(): | |
return render_template('index.html') | |
def generate(): | |
user_input = request.form['user_input'] | |
try: | |
if "urname" in user_input and "what" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
elif "your name" in user_input and "what" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
elif "tell " in user_input and "your name" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
elif "what" in user_input and "you go by" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
elif "what" in user_input and "call yourself" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
elif "what" in user_input and "they call you" in user_input: | |
response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
else: | |
response_text = generate_text(model, tokenizer, user_input) | |
return jsonify({"response": response_text}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=8080) | |