GabrielSalem commited on
Commit
d061bf7
·
verified ·
1 Parent(s): 7373476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -122
app.py CHANGED
@@ -1,152 +1,145 @@
1
- from flask import Flask, render_template, request, redirect, url_for, send_file, jsonify
2
- from transformers import GPT2LMHeadModel, GPT2Tokenizer
3
  import os
4
- import torch
5
- import zipfile
6
  import pandas as pd
7
- from utils import preprocess_data, train_model
 
 
8
 
9
  app = Flask(__name__)
10
  app.config["UPLOAD_FOLDER"] = "uploads"
11
- app.config["MODEL_FOLDER"] = "models"
12
 
13
- # Initialize device
14
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
15
 
16
- # Load tokenizer and set padding if needed
17
- model_name = "gpt2"
18
- tokenizer = GPT2Tokenizer.from_pretrained(model_name)
19
- tokenizer.add_special_tokens({'pad_token': '[PAD]'})
20
 
21
- # Cache for loaded models to avoid redundant loading
22
- loaded_models = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  @app.route("/")
26
  def home():
27
- # List available models
28
- models = [model for model in os.listdir(app.config["MODEL_FOLDER"]) if
29
- os.path.isdir(os.path.join(app.config["MODEL_FOLDER"], model))]
30
- return render_template("home.html", models=models)
31
 
32
 
33
  @app.route("/upload", methods=["POST"])
34
  def upload_file():
35
- if "file" not in request.files or "model_name" not in request.form:
36
  return redirect(request.url)
37
 
38
  file = request.files["file"]
39
- model_name = request.form["model_name"]
 
 
 
 
40
 
41
- if not file.filename or not model_name:
42
- return redirect(request.url)
 
43
 
44
- # Prepare directories and paths
45
- model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
46
- os.makedirs(model_path, exist_ok=True)
47
- filepath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
48
- file.save(filepath)
49
 
50
- # Load and preprocess data
51
- try:
52
- df = pd.read_csv(filepath)
53
- dataset = preprocess_data(df, tokenizer)
54
- except Exception as e:
55
- return f"Data processing error: {e}", 500
56
 
57
- # Train and save model
58
- try:
59
- # Clear any previous GPU memory allocation
60
- torch.cuda.empty_cache()
61
-
62
- model = GPT2LMHeadModel.from_pretrained("gpt2")
63
- model.resize_token_embeddings(len(tokenizer))
64
- model.to(device)
65
-
66
- # Train the model
67
- train_model(model, tokenizer, dataset, model_path)
68
-
69
- # Clear GPU memory right after training
70
- del model
71
- torch.cuda.empty_cache()
72
- except torch.cuda.OutOfMemoryError:
73
- # Clear memory in case of OOM error and return an appropriate message
74
- torch.cuda.empty_cache()
75
- return "CUDA out of memory error. Try a smaller model or reduce batch size.", 500
76
- except Exception as e:
77
- return f"Model training error: {e}", 500
78
-
79
- # Zip the model files for download
80
- model_zip_path = os.path.join(model_path, f"{model_name}.zip")
81
- with zipfile.ZipFile(model_zip_path, 'w') as model_zip:
82
- for folder, _, files in os.walk(model_path):
83
- for file_name in files:
84
- file_path = os.path.join(folder, file_name)
85
- model_zip.write(file_path, os.path.relpath(file_path, app.config["MODEL_FOLDER"]))
86
-
87
- return redirect(url_for("home"))
88
-
89
-
90
- @app.route("/download/<model_name>")
91
- def download_model(model_name):
92
- model_path = os.path.join(app.config["MODEL_FOLDER"], model_name, f"{model_name}.zip")
93
- if os.path.exists(model_path):
94
- return send_file(model_path, as_attachment=True)
95
  else:
96
- return "Model not found", 404
97
-
98
-
99
- @app.route("/chat/<model_name>")
100
- def chat(model_name):
101
- return render_template("chat.html", model_name=model_name)
102
-
103
-
104
- @app.route("/generate/<model_name>", methods=["POST"])
105
- def generate_response(model_name):
106
- prompt = request.json.get("prompt")
107
- if not prompt:
108
- return jsonify({"error": "No prompt provided"}), 400
109
-
110
- # Load the model if not already in cache
111
- if model_name not in loaded_models:
112
- model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
113
- if not os.path.exists(model_path):
114
- return jsonify({"error": f"Model '{model_name}' not found"}), 404
115
- try:
116
- # Clear GPU memory and load the model
117
- torch.cuda.empty_cache()
118
- model = GPT2LMHeadModel.from_pretrained(model_path)
119
- model.to(device)
120
- loaded_models[model_name] = model
121
- except Exception as e:
122
- return jsonify({"error": f"Failed to load model '{model_name}': {str(e)}"}), 500
123
-
124
- # Generate response
125
- model = loaded_models[model_name]
126
- try:
127
- inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
128
- outputs = model.generate(
129
- inputs,
130
- max_length=50,
131
- num_return_sequences=1,
132
- no_repeat_ngram_size=2,
133
- pad_token_id=tokenizer.eos_token_id
134
- )
135
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
136
- except torch.cuda.OutOfMemoryError:
137
- torch.cuda.empty_cache()
138
- return jsonify({"error": "Out of memory. Try a smaller model or shorter prompt."}), 500
139
- except Exception as e:
140
- return jsonify({"error": str(e)}), 500
141
- finally:
142
- # Clear GPU memory after generation to avoid leaks
143
- torch.cuda.empty_cache()
144
-
145
- return jsonify({"response": response})
146
 
147
 
148
  if __name__ == "__main__":
149
  os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
150
- os.makedirs(app.config["MODEL_FOLDER"], exist_ok=True)
151
  app.run(debug=True)
 
152
 
 
1
+ from flask import Flask, render_template, request, jsonify, redirect, url_for
2
+ from huggingface_hub import InferenceClient
3
  import os
4
+ import json
 
5
  import pandas as pd
6
+ import PyPDF2
7
+ import docx
8
+ from werkzeug.utils import secure_filename
9
 
10
  app = Flask(__name__)
11
  app.config["UPLOAD_FOLDER"] = "uploads"
12
+ app.config["HISTORY_FILE"] = "history.json"
13
 
14
+ # Initialize Hugging Face API client
15
+ API_KEY = "APIHUGGING" # Replace with your key
16
+ client = InferenceClient(api_key=API_KEY)
17
 
18
+ # Allowed file extensions
19
+ ALLOWED_EXTENSIONS = {"txt", "csv", "json", "pdf", "docx"}
 
 
20
 
21
+
22
+ # Utility: Check allowed file types
23
+ def allowed_file(filename):
24
+ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
25
+
26
+
27
+ # Utility: Load conversation history
28
+ def load_history():
29
+ try:
30
+ with open(app.config["HISTORY_FILE"], "r") as file:
31
+ return json.load(file)
32
+ except FileNotFoundError:
33
+ return []
34
+
35
+
36
+ # Utility: Save conversation history
37
+ def save_history(history):
38
+ with open(app.config["HISTORY_FILE"], "w") as file:
39
+ json.dump(history, file, indent=4)
40
+
41
+
42
+ # Utility: Extract text from files
43
+ def extract_text(file_path, file_type):
44
+ if file_type == "txt":
45
+ with open(file_path, "r") as f:
46
+ return f.read()
47
+ elif file_type == "csv":
48
+ df = pd.read_csv(file_path)
49
+ return df.to_string()
50
+ elif file_type == "json":
51
+ with open(file_path, "r") as f:
52
+ data = json.load(f)
53
+ return json.dumps(data, indent=4)
54
+ elif file_type == "pdf":
55
+ text = ""
56
+ with open(file_path, "rb") as f:
57
+ reader = PyPDF2.PdfReader(f)
58
+ for page in reader.pages:
59
+ text += page.extract_text()
60
+ return text
61
+ elif file_type == "docx":
62
+ doc = docx.Document(file_path)
63
+ return "\n".join([p.text for p in doc.paragraphs])
64
+ else:
65
+ return ""
66
+
67
+
68
+ # Hugging Face Chat Response
69
+ def get_bot_response(messages):
70
+ stream = client.chat.completions.create(
71
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
72
+ messages=messages,
73
+ max_tokens=500,
74
+ stream=True
75
+ )
76
+ bot_response = ""
77
+ for chunk in stream:
78
+ if chunk.choices and len(chunk.choices) > 0:
79
+ new_content = chunk.choices[0].delta.content
80
+ bot_response += new_content
81
+ return bot_response
82
 
83
 
84
  @app.route("/")
85
  def home():
86
+ history = load_history()
87
+ return render_template("home.html", history=history)
 
 
88
 
89
 
90
  @app.route("/upload", methods=["POST"])
91
  def upload_file():
92
+ if "file" not in request.files:
93
  return redirect(request.url)
94
 
95
  file = request.files["file"]
96
+ if file and allowed_file(file.filename):
97
+ filename = secure_filename(file.filename)
98
+ file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
99
+ os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
100
+ file.save(file_path)
101
 
102
+ # Extract text from file
103
+ file_type = filename.rsplit(".", 1)[1].lower()
104
+ extracted_text = extract_text(file_path, file_type)
105
 
106
+ # Update conversation history
107
+ history = load_history()
108
+ history.append({"role": "user", "content": f"File content:\n{extracted_text}"})
 
 
109
 
110
+ # Get response from Hugging Face API
111
+ bot_response = get_bot_response(history)
112
+ history.append({"role": "assistant", "content": bot_response})
 
 
 
113
 
114
+ save_history(history)
115
+
116
+ return jsonify({"response": bot_response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  else:
118
+ return jsonify({"error": "Invalid file type"}), 400
119
+
120
+
121
+ @app.route("/generate", methods=["POST"])
122
+ def generate_response():
123
+ data = request.json
124
+ user_message = data.get("message")
125
+ if not user_message:
126
+ return jsonify({"error": "Message is required"}), 400
127
+
128
+ # Update conversation history
129
+ history = load_history()
130
+ history.append({"role": "user", "content": user_message})
131
+
132
+ # Get response from Hugging Face API
133
+ bot_response = get_bot_response(history)
134
+ history.append({"role": "assistant", "content": bot_response})
135
+
136
+ save_history(history)
137
+
138
+ return jsonify({"response": bot_response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
  if __name__ == "__main__":
142
  os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
 
143
  app.run(debug=True)
144
+
145