Spaces:

GabrielSalem
/

RealTimeAnswer

Sleeping

App Files Files Community

GabrielSalem commited on Dec 17, 2024

Commit

d061bf7

verified ·

1 Parent(s): 7373476

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -122

app.py CHANGED Viewed

@@ -1,152 +1,145 @@
-from flask import Flask, render_template, request, redirect, url_for, send_file, jsonify
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import os
-import torch
-import zipfile
 import pandas as pd
-from utils import preprocess_data, train_model
 app = Flask(__name__)
 app.config["UPLOAD_FOLDER"] = "uploads"
-app.config["MODEL_FOLDER"] = "models"
-# Initialize device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load tokenizer and set padding if needed
-model_name = "gpt2"
-tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-# Cache for loaded models to avoid redundant loading
-loaded_models = {}
 @app.route("/")
 def home():
-    # List available models
-    models = [model for model in os.listdir(app.config["MODEL_FOLDER"]) if
-              os.path.isdir(os.path.join(app.config["MODEL_FOLDER"], model))]
-    return render_template("home.html", models=models)
 @app.route("/upload", methods=["POST"])
 def upload_file():
-    if "file" not in request.files or "model_name" not in request.form:
         return redirect(request.url)
     file = request.files["file"]
-    model_name = request.form["model_name"]
-    if not file.filename or not model_name:
-        return redirect(request.url)
-    # Prepare directories and paths
-    model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
-    os.makedirs(model_path, exist_ok=True)
-    filepath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
-    file.save(filepath)
-    # Load and preprocess data
-    try:
-        df = pd.read_csv(filepath)
-        dataset = preprocess_data(df, tokenizer)
-    except Exception as e:
-        return f"Data processing error: {e}", 500
-    # Train and save model
-    try:
-        # Clear any previous GPU memory allocation
-        torch.cuda.empty_cache()
-        model = GPT2LMHeadModel.from_pretrained("gpt2")
-        model.resize_token_embeddings(len(tokenizer))
-        model.to(device)
-        # Train the model
-        train_model(model, tokenizer, dataset, model_path)
-        # Clear GPU memory right after training
-        del model
-        torch.cuda.empty_cache()
-    except torch.cuda.OutOfMemoryError:
-        # Clear memory in case of OOM error and return an appropriate message
-        torch.cuda.empty_cache()
-        return "CUDA out of memory error. Try a smaller model or reduce batch size.", 500
-    except Exception as e:
-        return f"Model training error: {e}", 500
-    # Zip the model files for download
-    model_zip_path = os.path.join(model_path, f"{model_name}.zip")
-    with zipfile.ZipFile(model_zip_path, 'w') as model_zip:
-        for folder, _, files in os.walk(model_path):
-            for file_name in files:
-                file_path = os.path.join(folder, file_name)
-                model_zip.write(file_path, os.path.relpath(file_path, app.config["MODEL_FOLDER"]))
-    return redirect(url_for("home"))
-@app.route("/download/<model_name>")
-def download_model(model_name):
-    model_path = os.path.join(app.config["MODEL_FOLDER"], model_name, f"{model_name}.zip")
-    if os.path.exists(model_path):
-        return send_file(model_path, as_attachment=True)
     else:
-        return "Model not found", 404
-@app.route("/chat/<model_name>")
-def chat(model_name):
-    return render_template("chat.html", model_name=model_name)
-@app.route("/generate/<model_name>", methods=["POST"])
-def generate_response(model_name):
-    prompt = request.json.get("prompt")
-    if not prompt:
-        return jsonify({"error": "No prompt provided"}), 400
-    # Load the model if not already in cache
-    if model_name not in loaded_models:
-        model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
-        if not os.path.exists(model_path):
-            return jsonify({"error": f"Model '{model_name}' not found"}), 404
-        try:
-            # Clear GPU memory and load the model
-            torch.cuda.empty_cache()
-            model = GPT2LMHeadModel.from_pretrained(model_path)
-            model.to(device)
-            loaded_models[model_name] = model
-        except Exception as e:
-            return jsonify({"error": f"Failed to load model '{model_name}': {str(e)}"}), 500
-    # Generate response
-    model = loaded_models[model_name]
-    try:
-        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
-        outputs = model.generate(
-            inputs,
-            max_length=50,
-            num_return_sequences=1,
-            no_repeat_ngram_size=2,
-            pad_token_id=tokenizer.eos_token_id
-        )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    except torch.cuda.OutOfMemoryError:
-        torch.cuda.empty_cache()
-        return jsonify({"error": "Out of memory. Try a smaller model or shorter prompt."}), 500
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-    finally:
-        # Clear GPU memory after generation to avoid leaks
-        torch.cuda.empty_cache()
-    return jsonify({"response": response})
 if __name__ == "__main__":
     os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
-    os.makedirs(app.config["MODEL_FOLDER"], exist_ok=True)
     app.run(debug=True)

+from flask import Flask, render_template, request, jsonify, redirect, url_for
+from huggingface_hub import InferenceClient
 import os
+import json
 import pandas as pd
+import PyPDF2
+import docx
+from werkzeug.utils import secure_filename
 app = Flask(__name__)
 app.config["UPLOAD_FOLDER"] = "uploads"
+app.config["HISTORY_FILE"] = "history.json"
+# Initialize Hugging Face API client
+API_KEY = "APIHUGGING"  # Replace with your key
+client = InferenceClient(api_key=API_KEY)
+# Allowed file extensions
+ALLOWED_EXTENSIONS = {"txt", "csv", "json", "pdf", "docx"}
+# Utility: Check allowed file types
+def allowed_file(filename):
+    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
+# Utility: Load conversation history
+def load_history():
+    try:
+        with open(app.config["HISTORY_FILE"], "r") as file:
+            return json.load(file)
+    except FileNotFoundError:
+        return []
+# Utility: Save conversation history
+def save_history(history):
+    with open(app.config["HISTORY_FILE"], "w") as file:
+        json.dump(history, file, indent=4)
+# Utility: Extract text from files
+def extract_text(file_path, file_type):
+    if file_type == "txt":
+        with open(file_path, "r") as f:
+            return f.read()
+    elif file_type == "csv":
+        df = pd.read_csv(file_path)
+        return df.to_string()
+    elif file_type == "json":
+        with open(file_path, "r") as f:
+            data = json.load(f)
+            return json.dumps(data, indent=4)
+    elif file_type == "pdf":
+        text = ""
+        with open(file_path, "rb") as f:
+            reader = PyPDF2.PdfReader(f)
+            for page in reader.pages:
+                text += page.extract_text()
+        return text
+    elif file_type == "docx":
+        doc = docx.Document(file_path)
+        return "\n".join([p.text for p in doc.paragraphs])
+    else:
+        return ""
+# Hugging Face Chat Response
+def get_bot_response(messages):
+    stream = client.chat.completions.create(
+        model="Qwen/Qwen2.5-Coder-32B-Instruct",
+        messages=messages,
+        max_tokens=500,
+        stream=True
+    )
+    bot_response = ""
+    for chunk in stream:
+        if chunk.choices and len(chunk.choices) > 0:
+            new_content = chunk.choices[0].delta.content
+            bot_response += new_content
+    return bot_response
 @app.route("/")
 def home():
+    history = load_history()
+    return render_template("home.html", history=history)
 @app.route("/upload", methods=["POST"])
 def upload_file():
+    if "file" not in request.files:
         return redirect(request.url)
     file = request.files["file"]
+    if file and allowed_file(file.filename):
+        filename = secure_filename(file.filename)
+        file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
+        os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
+        file.save(file_path)
+        # Extract text from file
+        file_type = filename.rsplit(".", 1)[1].lower()
+        extracted_text = extract_text(file_path, file_type)
+        # Update conversation history
+        history = load_history()
+        history.append({"role": "user", "content": f"File content:\n{extracted_text}"})
+        # Get response from Hugging Face API
+        bot_response = get_bot_response(history)
+        history.append({"role": "assistant", "content": bot_response})
+        save_history(history)
+        return jsonify({"response": bot_response})
     else:
+        return jsonify({"error": "Invalid file type"}), 400
+@app.route("/generate", methods=["POST"])
+def generate_response():
+    data = request.json
+    user_message = data.get("message")
+    if not user_message:
+        return jsonify({"error": "Message is required"}), 400
+    # Update conversation history
+    history = load_history()
+    history.append({"role": "user", "content": user_message})
+    # Get response from Hugging Face API
+    bot_response = get_bot_response(history)
+    history.append({"role": "assistant", "content": bot_response})
+    save_history(history)
+    return jsonify({"response": bot_response})
 if __name__ == "__main__":
     os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
     app.run(debug=True)