Spaces:
Build error
Build error
| import sys | |
| import os | |
| import torch | |
| from safetensors.torch import load_file | |
| from transformers import AutoConfig, GenerationConfig | |
| import json | |
| from flask import Flask, request, jsonify, render_template | |
| app = Flask(__name__) | |
| # Define paths | |
| drive_folder = '/app' # Path where files will be downloaded in Docker container | |
| tokenizer_config_file = os.path.join(drive_folder, 'tokenizer_config.json') | |
| model_config_file = os.path.join(drive_folder, 'config.json') | |
| # Add the custom tokenizer and model paths to sys.path | |
| sys.path.append(drive_folder) | |
| # Debugging print statements | |
| print(f"Drive folder: {drive_folder}") | |
| print(f"Tokenizer config file: {tokenizer_config_file}") | |
| print(f"Model config file: {model_config_file}") | |
| # Import the custom configuration, tokenizer, and model classes | |
| try: | |
| from configuration_qwen import QWenConfig | |
| from tokenization_qwen import QWenTokenizer | |
| from modeling_qwen import QWenLMHeadModel | |
| print("Imported custom classes successfully!") | |
| except ImportError as e: | |
| print(f"Import error: {e}") | |
| raise | |
| # Ensure the tokenizer configuration file exists | |
| if not os.path.exists(tokenizer_config_file): | |
| raise FileNotFoundError(f"Tokenizer configuration file not found at {tokenizer_config_file}") | |
| # Load the tokenizer configuration | |
| with open(tokenizer_config_file, 'r') as f: | |
| tokenizer_config = json.load(f) | |
| # Load the model configuration from the provided config file | |
| with open(model_config_file, 'r') as f: | |
| model_config = json.load(f) | |
| # Disable FlashAttention for non-supported GPUs | |
| model_config["use_flash_attn"] = False | |
| model_config["use_dynamic_ntk"] = False # Disable other advanced features if necessary | |
| # Use the provided configuration for model initialization | |
| try: | |
| tokenizer = QWenTokenizer.from_pretrained(drive_folder) | |
| model = QWenLMHeadModel.from_pretrained(drive_folder, config=QWenConfig.from_pretrained(drive_folder, **model_config)) | |
| model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) | |
| print("Model and tokenizer loaded successfully!") | |
| except Exception as e: | |
| print("Error loading model or tokenizer:", e) | |
| raise | |
| def generate_text(model, tokenizer, prompt, max_length=200, temperature=0.7, top_k=50, top_p=0.9): | |
| try: | |
| # Tokenize the input | |
| input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device) | |
| # Set up generation configuration | |
| generation_config = GenerationConfig( | |
| max_length=max_length + len(input_ids[0]), | |
| do_sample=True, | |
| temperature=temperature, | |
| top_k=top_k, | |
| top_p=top_p, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Generate text using advanced sampling | |
| outputs = model.generate( | |
| input_ids, | |
| generation_config=generation_config | |
| ) | |
| # Decode the generated sequence | |
| decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Clean up the output | |
| start_index = decoded_output.find(prompt) | |
| generated_text = decoded_output[start_index + len(prompt):].strip() | |
| return generated_text | |
| except Exception as e: | |
| print("Error during text generation:", e) | |
| raise | |
| def home(): | |
| return render_template('index.html') | |
| def generate(): | |
| user_input = request.form['user_input'] | |
| try: | |
| if "urname" in user_input and "what" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| elif "your name" in user_input and "what" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| elif "tell " in user_input and "your name" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| elif "what" in user_input and "you go by" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| elif "what" in user_input and "call yourself" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| elif "what" in user_input and "they call you" in user_input: | |
| response_text = "I am Shanks, a large language model developed by Motaung.inc" | |
| else: | |
| response_text = generate_text(model, tokenizer, user_input) | |
| return jsonify({"response": response_text}) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 500 | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |