Files changed (10) hide show
  1. app.py +704 -44
  2. deploy-docs.yml +59 -0
  3. dummy-agent-test.yml +33 -0
  4. ghcr.yml +82 -0
  5. lint.yml +57 -0
  6. review-pr.yml +69 -0
  7. run-integration-tests.yml +104 -0
  8. run-unit-tests.yml +129 -0
  9. solve-issue.yml +109 -0
  10. stale.yml +29 -0
app.py CHANGED
@@ -1,46 +1,706 @@
1
- """
2
- Graded Challenge 5
3
- Nama: Devin Yaung Lee
4
- Batch: HCK-009
5
- program ini untuk mendeploy model
6
- """
7
-
8
- import streamlit as st
9
- import eda
10
- import model
11
-
12
- page = st.sidebar.selectbox(label="Select Page:", options=["Home Page", "Exploratory Data Analysis", "Predict Payment"])
13
-
14
- if page == "Home Page":
15
- st.title("Home Page")
16
- st.write('')
17
- st.write("Graded Challenge 5")
18
- st.write("Name : Devin Yaung Lee")
19
- st.write("Batch : HCK-009")
20
- st.write("Aplikasi ini memiliki tujuan utama dari aplikasi ini adalah untuk menampilkan hasil analisis data eksploratori dan untuk memprediksi apakah klien akan gagal membayar tagihan kartu kredit mereka pada bulan berikutnya berdasarkan data yang diberikan oleh pengguna.")
21
- st.write('')
22
- st.write('')
23
- st.write('')
24
-
25
- with st.expander("Background Information"):
26
- st.caption("Data ini membahas tentang pembayaran kartu kredit pelanggan, dimana tujuan utama dari dashboard ini adalah untuk mengetahui apakah pelanggan melunasi atau terlambat untuk melunasi, dimana ketentuan ini akan dilakukan berdasarkan dari pengisian parameter-parameter yang sudah dilakukan.")
27
- with st.expander("Conclusion"):
28
- st.caption("""
29
- - Data ini memiliki sifat data yang imbalanced, hal ini dapat dilihat
30
- dari proses hasil train dan test, sebelum dan sesudah di hyperparameter
31
- tuning. Model SVM memiliki peningkatan namun tidak signifikan.
32
- - Berdasarkan visualisasi, pelanggan yang sudah bayar mendominasi.
33
- Edukasi terbanyak adalah Universitas. Gender didominasi oleh wanita,
34
- dan untuk status pernikahan lebih banyak yang single.
35
- - Pemilihan model adalah SVM berdasarkan pengujian dari model
36
- Logistic regression, KNN, dan SVM.
37
- - Analisa credit behaviour menunjukkan 'Use Revolving Credit' paling umum.
38
- Kebanyakan pelanggan memilih untuk membayar sebagian tagihan mereka
39
- setiap bulan.
40
- """)
41
-
42
- elif page == "Exploratory Data Analysis":
43
- eda.run() # Calls the run function from eda
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  else:
46
- model.run() # Calls the run function from model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+ import os
22
+ import sys
23
+ import time
24
+ import calendar
25
+ import json
26
+ from model_setup_manager import download_model_by_name, build_engine_by_name
27
+ import logging
28
+ import gc
29
+ import torch
30
+ from pathlib import Path
31
+ from trt_llama_api import TrtLlmAPI
32
+ from whisper.trt_whisper import WhisperTRTLLM, decode_audio_file
33
+ #from langchain.embeddings.huggingface import HuggingFaceEmbeddings
34
+ #from langchain_community.embeddings import HuggingFaceBgeEmbeddings
35
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
36
+ from collections import defaultdict
37
+ from llama_index import ServiceContext
38
+ from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
39
+ from llama_index import set_global_service_context
40
+ from faiss_vector_storage import FaissEmbeddingStorage
41
+ from ui.user_interface import MainInterface
42
+ from scipy.io import wavfile
43
+ import scipy.signal as sps
44
+ import numpy as np
45
+ from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
46
+ from CLIP import run_model_on_images, CLIPEmbeddingStorageEngine
47
+ from PIL import Image
48
+ from transformers import CLIPProcessor, CLIPModel
49
+ import shutil
50
+ from llm_prompt_templates import LLMPromptTemplate
51
+ from utils import (read_model_name)
52
+ import win32api
53
+ import win32security
54
 
55
+ selected_CLIP = False
56
+ clip_engine = None
57
+ selected_ChatGLM = False
58
+ app_config_file = 'config\\app_config.json'
59
+ model_config_file = 'config\\config.json'
60
+ preference_config_file = 'config\\preferences.json'
61
+ data_source = 'directory'
62
+
63
+ # Use GetCurrentProcess to get a handle to the current process
64
+ hproc = win32api.GetCurrentProcess()
65
+ # Use GetCurrentProcessToken to get the token of the current process
66
+ htok = win32security.OpenProcessToken(hproc, win32security.TOKEN_QUERY)
67
+
68
+ # Retrieve the list of privileges enabled
69
+ privileges = win32security.GetTokenInformation(htok, win32security.TokenPrivileges)
70
+
71
+ # Iterate over privileges and output the ones that are enabled
72
+ priv_list = []
73
+ for priv_id, priv_flags in privileges:
74
+ # Check if privilege is enabled
75
+ if priv_flags == win32security.SE_PRIVILEGE_ENABLED or win32security.SE_PRIVILEGE_ENABLED_BY_DEFAULT:
76
+ # Lookup the name of the privilege
77
+ priv_name = win32security.LookupPrivilegeName(None, priv_id)
78
+ priv_list.append(priv_name)
79
+
80
+ print(f"Privileges of app process: {priv_list}")
81
+
82
+ def read_config(file_name):
83
+ try:
84
+ with open(file_name, 'r', encoding='utf8') as file:
85
+ return json.load(file)
86
+ except FileNotFoundError:
87
+ print(f"The file {file_name} was not found.")
88
+ except json.JSONDecodeError:
89
+ print(f"There was an error decoding the JSON from the file {file_name}.")
90
+ except Exception as e:
91
+ print(f"An unexpected error occurred: {e}")
92
+ return None
93
+
94
+
95
+ def get_model_config(config, model_name=None):
96
+ selected_model = next((model for model in config["models"]["supported"] if model["name"] == model_name),
97
+ config["models"]["supported"][0])
98
+ metadata = selected_model["metadata"]
99
+
100
+ cwd = os.getcwd() # Current working directory, to avoid calling os.getcwd() multiple times
101
+
102
+ if "ngc_model_name" in selected_model:
103
+ return {
104
+ "model_path": os.path.join(cwd, "model", selected_model["id"], "engine") if "id" in selected_model else None,
105
+ "engine": metadata.get("engine", None),
106
+ "tokenizer_path": os.path.join(cwd, "model", selected_model["id"] ,selected_model["prerequisite"]["tokenizer_local_dir"] ) if "tokenizer_local_dir" in selected_model["prerequisite"] else None,
107
+ "vocab": os.path.join(cwd, "model", selected_model["id"] ,selected_model["prerequisite"]["vocab_local_dir"], selected_model["prerequisite"]["tokenizer_files"]["vocab_file"]) if "vocab_local_dir" in selected_model["prerequisite"] else None,
108
+ "max_new_tokens": metadata.get("max_new_tokens", None),
109
+ "max_input_token": metadata.get("max_input_token", None),
110
+ "temperature": metadata.get("temperature", None),
111
+ "prompt_template": metadata.get("prompt_template", None)
112
+ }
113
+ elif "hf_model_name" in selected_model:
114
+ return {
115
+ "model_path": os.path.join(cwd, "model", selected_model["id"]) if "id" in selected_model else None,
116
+ "tokenizer_path": os.path.join(cwd, "model", selected_model["id"]) if "id" in selected_model else None,
117
+ "prompt_template": metadata.get("prompt_template", None)
118
+ }
119
+
120
+
121
+ def get_asr_model_config(config, model_name=None):
122
+ models = config["models"]["supported_asr"]
123
+ selected_model = next((model for model in models if model["name"] == model_name), models[0])
124
+ return {
125
+ "model_path": os.path.join(os.getcwd(), selected_model["metadata"]["model_path"]),
126
+ "assets_path": os.path.join(os.getcwd(), selected_model["metadata"]["assets_path"])
127
+ }
128
+
129
+ def get_data_path(config):
130
+ return os.path.join(os.getcwd(), config["dataset"]["path"])
131
+
132
+ # read the app specific config
133
+ app_config = read_config(app_config_file)
134
+ streaming = app_config["streaming"]
135
+ similarity_top_k = app_config["similarity_top_k"]
136
+ is_chat_engine = app_config["is_chat_engine"]
137
+ embedded_model_name = app_config["embedded_model"]
138
+ embedded_model = os.path.join(os.getcwd(), "model", embedded_model_name)
139
+ embedded_dimension = app_config["embedded_dimension"]
140
+ use_py_session = app_config["use_py_session"]
141
+ trtLlm_debug_mode = app_config["trtLlm_debug_mode"]
142
+ add_special_tokens = app_config["add_special_tokens"]
143
+ verbose = app_config["verbose"]
144
+
145
+ # read model specific config
146
+ selected_model_name = None
147
+ selected_data_directory = None
148
+ config = read_config(model_config_file)
149
+ if os.path.exists(preference_config_file):
150
+ perf_config = read_config(preference_config_file)
151
+ selected_model_name = perf_config.get('models', {}).get('selected')
152
+ selected_data_directory = perf_config.get('dataset', {}).get('path')
153
+
154
+ if selected_model_name == None:
155
+ selected_model_name = config["models"].get("selected")
156
+
157
+ if selected_model_name == "CLIP":
158
+ selected_CLIP = True
159
+ if selected_model_name == "ChatGLM 3 6B int4 (Supports Chinese)":
160
+ selected_ChatGLM = True
161
+
162
+ model_config = get_model_config(config, selected_model_name)
163
+ data_dir = config["dataset"]["path"] if selected_data_directory == None else selected_data_directory
164
+
165
+ asr_model_name = "Whisper Medium Int8"
166
+ asr_model_config = get_asr_model_config(config, asr_model_name)
167
+ asr_engine_path = asr_model_config["model_path"]
168
+ asr_assets_path = asr_model_config["assets_path"]
169
+
170
+ whisper_model = None
171
+ whisper_model_loaded = False
172
+ enable_asr = config["models"]["enable_asr"]
173
+ nvmlInit()
174
+
175
+ def generate_inferance_engine(data, force_rewrite=False):
176
+ """
177
+ Initialize and return a FAISS-based inference engine.
178
+
179
+ Args:
180
+ data: The directory where the data for the inference engine is located.
181
+ force_rewrite (bool): If True, force rewriting the index.
182
+
183
+ Returns:
184
+ The initialized inference engine.
185
+
186
+ Raises:
187
+ RuntimeError: If unable to generate the inference engine.
188
+ """
189
+ try:
190
+ global engine
191
+ faiss_storage = FaissEmbeddingStorage(data_dir=data,
192
+ dimension=embedded_dimension)
193
+ faiss_storage.initialize_index(force_rewrite=force_rewrite)
194
+ engine = faiss_storage.get_engine(is_chat_engine=is_chat_engine, streaming=streaming,
195
+ similarity_top_k=similarity_top_k)
196
+ except Exception as e:
197
+ raise RuntimeError(f"Unable to generate the inference engine: {e}")
198
+
199
+ def generate_clip_engine(data_dir, model_path, clip_model, clip_processor, force_rewrite=False):
200
+ global clip_engine
201
+ clip_engine = CLIPEmbeddingStorageEngine(data_dir, model_path, clip_model, clip_processor)
202
+ clip_engine.create_nodes(force_rewrite)
203
+ clip_engine.initialize_index(force_rewrite)
204
+
205
+ llm = None
206
+ embed_model = None
207
+ service_context = None
208
+ clip_model = None
209
+ clip_processor = None
210
+
211
+ if selected_CLIP:
212
+ # Initialize model and processor
213
+ clip_model = CLIPModel.from_pretrained(model_config["model_path"]).to('cuda')
214
+ clip_processor = CLIPProcessor.from_pretrained(model_config["model_path"])
215
+ generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
216
  else:
217
+ # create trt_llm engine object
218
+ model_name, _ = read_model_name(model_config["model_path"])
219
+ prompt_template_obj = LLMPromptTemplate()
220
+ text_qa_template_str = prompt_template_obj.model_context_template(model_name)
221
+ selected_completion_to_prompt = text_qa_template_str
222
+ llm = TrtLlmAPI(
223
+ model_path=model_config["model_path"],
224
+ engine_name=model_config["engine"],
225
+ tokenizer_dir=model_config["tokenizer_path"],
226
+ temperature=model_config["temperature"],
227
+ max_new_tokens=model_config["max_new_tokens"],
228
+ context_window=model_config["max_input_token"],
229
+ vocab_file=model_config["vocab"],
230
+ messages_to_prompt=messages_to_prompt,
231
+ completion_to_prompt=selected_completion_to_prompt,
232
+ use_py_session=use_py_session,
233
+ add_special_tokens=add_special_tokens,
234
+ trtLlm_debug_mode=trtLlm_debug_mode,
235
+ verbose=verbose
236
+ )
237
+
238
+ # create embeddings model object
239
+ embed_model = HuggingFaceEmbeddings(model_name=embedded_model)
240
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
241
+ context_window=model_config["max_input_token"], chunk_size=512,
242
+ chunk_overlap=200)
243
+ set_global_service_context(service_context)
244
+
245
+ # load the vectorstore index
246
+ generate_inferance_engine(data_dir)
247
+
248
+ def call_llm_streamed(query):
249
+ partial_response = ""
250
+ response = llm.stream_complete(query, formatted=False)
251
+ for token in response:
252
+ partial_response += token.delta
253
+ yield partial_response
254
+
255
+ def chatbot(query, chat_history, session_id):
256
+ if selected_CLIP:
257
+ ts = calendar.timegm(time.gmtime())
258
+ temp_image_folder_name = "Temp/Temp_Images"
259
+ if os.path.isdir(temp_image_folder_name):
260
+ try:
261
+ shutil.rmtree(os.path.join(os.getcwd(), temp_image_folder_name))
262
+ except Exception as e:
263
+ print("Exception during folder delete", e)
264
+ image_results_path = os.path.join(os.getcwd(), temp_image_folder_name, str(ts))
265
+ res_im_paths = clip_engine.query(query, image_results_path)
266
+ if len(res_im_paths) == 0:
267
+ yield "No supported images found in the selected folder"
268
+ torch.cuda.empty_cache()
269
+ gc.collect()
270
+ return
271
+
272
+ div_start = '<div class="chat-output-images">'
273
+ div_end = '</div>'
274
+ im_elements = ''
275
+ for i, im in enumerate(res_im_paths):
276
+ if i>2 : break # display atmost 3 images.
277
+ cur_data_link_src = temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
278
+ cur_src = "file/" + temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
279
+ im_elements += '<img data-link="{data_link_src}" src="{src}"/>'.format(src=cur_src, data_link_src=cur_data_link_src)
280
+ full_div = (div_start + im_elements + div_end)
281
+ folder_link = f'<a data-link="{image_results_path}">{"See all matches"}</a>'
282
+ prefix = ""
283
+ if(len(res_im_paths)>1):
284
+ prefix = "Here are the top matching pictures from your dataset"
285
+ else:
286
+ prefix = "Here is the top matching picture from your dataset"
287
+ response = prefix + "<br>"+ full_div + "<br>"+ folder_link
288
+
289
+ gc.collect()
290
+ torch.cuda.empty_cache()
291
+ yield response
292
+ torch.cuda.empty_cache()
293
+ gc.collect()
294
+ return
295
+
296
+ if data_source == "nodataset":
297
+ yield llm.complete(query, formatted=False).text
298
+ return
299
+
300
+ if is_chat_engine:
301
+ response = engine.chat(query)
302
+ else:
303
+ response = engine.query(query)
304
+
305
+ lowest_score_file = None
306
+ lowest_score = sys.float_info.max
307
+ for node in response.source_nodes:
308
+ metadata = node.metadata
309
+ if 'filename' in metadata:
310
+ if node.score < lowest_score:
311
+ lowest_score = node.score
312
+ lowest_score_file = metadata['filename']
313
+
314
+ file_links = []
315
+ seen_files = set() # Set to track unique file names
316
+ ts = calendar.timegm(time.gmtime())
317
+ temp_docs_folder_name = "Temp/Temp_Docs"
318
+ docs_path = os.path.join(os.getcwd(), temp_docs_folder_name, str(ts))
319
+ os.makedirs(docs_path, exist_ok=True)
320
+
321
+ # Generate links for the file with the highest aggregated score
322
+ if lowest_score_file:
323
+ abs_path = Path(os.path.join(os.getcwd(), lowest_score_file.replace('\\', '/')))
324
+ file_name = os.path.basename(abs_path)
325
+ doc_path = os.path.join(docs_path, file_name)
326
+ shutil.copy(abs_path, doc_path)
327
+
328
+ if file_name not in seen_files: # Ensure the file hasn't already been processed
329
+ if data_source == 'directory':
330
+ file_link = f'<a data-link="{doc_path}">{file_name}</a>'
331
+ else:
332
+ exit("Wrong data_source type")
333
+ file_links.append(file_link)
334
+ seen_files.add(file_name) # Mark file as processed
335
+
336
+ response_txt = str(response)
337
+ if file_links:
338
+ response_txt += "<br>Reference files:<br>" + "<br>".join(file_links)
339
+ if not lowest_score_file: # If no file with a high score was found
340
+ response_txt = llm.complete(query).text
341
+ yield response_txt
342
+
343
+ def stream_chatbot(query, chat_history, session_id):
344
+
345
+ if selected_CLIP:
346
+ ts = calendar.timegm(time.gmtime())
347
+ temp_image_folder_name = "Temp/Temp_Images"
348
+ if os.path.isdir(temp_image_folder_name):
349
+ try:
350
+ shutil.rmtree(os.path.join(os.getcwd(), temp_image_folder_name))
351
+ except Exception as e:
352
+ print("Exception during folder delete", e)
353
+ image_results_path = os.path.join(os.getcwd(), temp_image_folder_name, str(ts))
354
+ res_im_paths = clip_engine.query(query, image_results_path)
355
+ if len(res_im_paths) == 0:
356
+ yield "No supported images found in the selected folder"
357
+ torch.cuda.empty_cache()
358
+ gc.collect()
359
+ return
360
+ div_start = '<div class="chat-output-images">'
361
+ div_end = '</div>'
362
+ im_elements = ''
363
+ for i, im in enumerate(res_im_paths):
364
+ if i>2 : break # display atmost 3 images.
365
+ cur_data_link_src = temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
366
+ cur_src = "file/" + temp_image_folder_name +"/" + str(ts) + "/" + os.path.basename(im)
367
+ im_elements += '<img data-link="{data_link_src}" src="{src}"/>'.format(src=cur_src, data_link_src=cur_data_link_src)
368
+ full_div = (div_start + im_elements + div_end)
369
+ folder_link = f'<a data-link="{image_results_path}">{"See all matches"}</a>'
370
+ prefix = ""
371
+ if(len(res_im_paths)>1):
372
+ prefix = "Here are the top matching pictures from your dataset"
373
+ else:
374
+ prefix = "Here is the top matching picture from your dataset"
375
+ response = prefix + "<br>"+ full_div + "<br>"+ folder_link
376
+ yield response
377
+ torch.cuda.empty_cache()
378
+ gc.collect()
379
+ return
380
+
381
+ if data_source == "nodataset":
382
+ for response in call_llm_streamed(query):
383
+ yield response
384
+ return
385
+
386
+ if is_chat_engine:
387
+ response = engine.stream_chat(query)
388
+ else:
389
+ response = engine.query(query)
390
+
391
+ partial_response = ""
392
+ if len(response.source_nodes) == 0:
393
+ response = llm.stream_complete(query, formatted=False)
394
+ for token in response:
395
+ partial_response += token.delta
396
+ yield partial_response
397
+ else:
398
+ # Aggregate scores by file
399
+ lowest_score_file = None
400
+ lowest_score = sys.float_info.max
401
+
402
+ for node in response.source_nodes:
403
+ if 'filename' in node.metadata:
404
+ if node.score < lowest_score:
405
+ lowest_score = node.score
406
+ lowest_score_file = node.metadata['filename']
407
+
408
+ file_links = []
409
+ seen_files = set()
410
+ for token in response.response_gen:
411
+ partial_response += token
412
+ yield partial_response
413
+ time.sleep(0.05)
414
+
415
+ time.sleep(0.2)
416
+ ts = calendar.timegm(time.gmtime())
417
+ temp_docs_folder_name = "Temp/Temp_Docs"
418
+ docs_path = os.path.join(os.getcwd(), temp_docs_folder_name, str(ts))
419
+ os.makedirs(docs_path, exist_ok=True)
420
+
421
+ if lowest_score_file:
422
+ abs_path = Path(os.path.join(os.getcwd(), lowest_score_file.replace('\\', '/')))
423
+ file_name = os.path.basename(abs_path)
424
+ doc_path = os.path.join(docs_path, file_name)
425
+ shutil.copy(abs_path, doc_path)
426
+ if file_name not in seen_files: # Check if file_name is already seen
427
+ if data_source == 'directory':
428
+ file_link = f'<a data-link="{doc_path}">{file_name}</a>'
429
+ else:
430
+ exit("Wrong data_source type")
431
+ file_links.append(file_link)
432
+ seen_files.add(file_name) # Add file_name to the set
433
+
434
+ if file_links:
435
+ partial_response += "<br>Reference files:<br>" + "<br>".join(file_links)
436
+ yield partial_response
437
+
438
+ # call garbage collector after inference
439
+ torch.cuda.empty_cache()
440
+ gc.collect()
441
+
442
+
443
+ interface = MainInterface(chatbot=stream_chatbot if streaming else chatbot, streaming=streaming)
444
+
445
+
446
+ def on_shutdown_handler(session_id):
447
+ global llm, whisper_model, clip_model, clip_processor, clip_engine
448
+ import gc
449
+ if whisper_model is not None:
450
+ whisper_model.unload_model()
451
+ del whisper_model
452
+ whisper_model = None
453
+ if llm is not None:
454
+ llm.unload_model()
455
+ del llm
456
+ llm = None
457
+ if clip_model is not None:
458
+ del clip_model
459
+ del clip_processor
460
+ del clip_engine
461
+ clip_model = None
462
+ clip_processor = None
463
+ clip_engine = None
464
+ temp_data_folder_name = "Temp"
465
+ if os.path.isdir(temp_data_folder_name):
466
+ try:
467
+ shutil.rmtree(os.path.join(os.getcwd(), temp_data_folder_name))
468
+ except Exception as e:
469
+ print("Exception during temp folder delete", e)
470
+ # Force a garbage collection cycle
471
+ gc.collect()
472
+
473
+
474
+ interface.on_shutdown(on_shutdown_handler)
475
+
476
+
477
+ def reset_chat_handler(session_id):
478
+ global faiss_storage
479
+ global engine
480
+ print('reset chat called', session_id)
481
+ if selected_CLIP:
482
+ return
483
+ if is_chat_engine == True:
484
+ faiss_storage.reset_engine(engine)
485
+
486
+
487
+ interface.on_reset_chat(reset_chat_handler)
488
+
489
+
490
+ def on_dataset_path_updated_handler(source, new_directory, video_count, session_id):
491
+ print('data set path updated to ', source, new_directory, video_count, session_id)
492
+ global engine
493
+ global data_dir
494
+ if selected_CLIP:
495
+ data_dir = new_directory
496
+ generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
497
+ return
498
+ if source == 'directory':
499
+ if data_dir != new_directory:
500
+ data_dir = new_directory
501
+ generate_inferance_engine(data_dir)
502
+
503
+
504
+ interface.on_dataset_path_updated(on_dataset_path_updated_handler)
505
+
506
+
507
+ def on_model_change_handler(model, model_info, session_id):
508
+ global llm, embedded_model, engine, data_dir, service_context, clip_model, clip_processor, selected_CLIP, selected_model_name, embed_model, model_config, selected_ChatGLM, clip_engine
509
+ selected_model_name = model
510
+ selected_ChatGLM = False
511
+
512
+ if llm is not None:
513
+ llm.unload_model()
514
+ del llm
515
+ llm = None
516
+
517
+ if clip_model != None:
518
+ del clip_model
519
+ clip_model = None
520
+ del clip_processor
521
+ clip_processor = None
522
+ del clip_engine
523
+ clip_engine = None
524
+
525
+ torch.cuda.empty_cache()
526
+ gc.collect()
527
+
528
+ cwd = os.getcwd()
529
+ model_config = get_model_config(config, selected_model_name)
530
+
531
+ selected_CLIP = False
532
+ if selected_model_name == "CLIP":
533
+ selected_CLIP = True
534
+ if clip_model == None:
535
+ clip_model = CLIPModel.from_pretrained(model_config["model_path"]).to('cuda')
536
+ clip_processor = CLIPProcessor.from_pretrained(model_config["model_path"])
537
+ generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor)
538
+ return
539
+
540
+ model_path = os.path.join(cwd, "model", model_info["id"], "engine") if "id" in model_info else None
541
+ engine_name = model_info["metadata"].get('engine', None)
542
+
543
+ if not model_path or not engine_name:
544
+ print("Model path or engine not provided in metadata")
545
+ return
546
+
547
+ if selected_model_name == "ChatGLM 3 6B int4 (Supports Chinese)":
548
+ selected_ChatGLM = True
549
+
550
+ model_name, _ = read_model_name(model_path)
551
+ prompt_template = LLMPromptTemplate()
552
+ text_qa_template_str = prompt_template.model_context_template(model_name)
553
+ selected_completion_to_prompt = text_qa_template_str
554
+
555
+ #selected_completion_to_prompt = chatglm_completion_to_prompt if selected_ChatGLM else completion_to_prompt
556
+ llm = TrtLlmAPI(
557
+ model_path=model_path,
558
+ engine_name=engine_name,
559
+ tokenizer_dir=os.path.join(cwd, "model", model_info["id"] ,model_info["prerequisite"]["tokenizer_local_dir"] ) if "tokenizer_local_dir" in model_info["prerequisite"] else None,
560
+ temperature=model_info["metadata"].get("temperature"),
561
+ max_new_tokens=model_info["metadata"].get("max_new_tokens"),
562
+ context_window=model_info["metadata"].get("max_input_token"),
563
+ vocab_file=os.path.join(cwd, "model", model_info["id"] ,model_info["prerequisite"]["vocab_local_dir"], model_info["prerequisite"]["tokenizer_files"]["vocab_file"]) if "vocab_local_dir" in model_info["prerequisite"] else None,
564
+ messages_to_prompt=messages_to_prompt,
565
+ completion_to_prompt=selected_completion_to_prompt,
566
+ use_py_session=use_py_session,
567
+ add_special_tokens=add_special_tokens,
568
+ trtLlm_debug_mode=trtLlm_debug_mode,
569
+ verbose=verbose
570
+ )
571
+ if embed_model is None : embed_model = HuggingFaceEmbeddings(model_name=embedded_model)
572
+ if service_context is None:
573
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,
574
+ context_window=model_config["max_input_token"], chunk_size=512,
575
+ chunk_overlap=200)
576
+ else:
577
+ service_context = ServiceContext.from_service_context(service_context=service_context, llm=llm)
578
+ set_global_service_context(service_context)
579
+ generate_inferance_engine(data_dir)
580
+
581
+
582
+ interface.on_model_change(on_model_change_handler)
583
+
584
+
585
+ def on_dataset_source_change_handler(source, path, session_id):
586
+
587
+ global data_source, data_dir, engine
588
+ data_source = source
589
+
590
+ if data_source == "nodataset":
591
+ print(' No dataset source selected', session_id)
592
+ return
593
+
594
+ print('dataset source updated ', source, path, session_id)
595
+
596
+ if data_source == "directory":
597
+ data_dir = path
598
+ else:
599
+ print("Wrong data type selected")
600
+ generate_inferance_engine(data_dir)
601
+
602
+
603
+ interface.on_dataset_source_updated(on_dataset_source_change_handler)
604
+
605
+ def handle_regenerate_index(source, path, session_id):
606
+ if selected_CLIP:
607
+ generate_clip_engine(data_dir, model_config["model_path"], clip_model, clip_processor, force_rewrite=True)
608
+ else:
609
+ generate_inferance_engine(path, force_rewrite=True)
610
+ print("on regenerate index", source, path, session_id)
611
+
612
+
613
+ def mic_init_handler():
614
+ global whisper_model, whisper_model_loaded, enable_asr
615
+ enable_asr = config["models"]["enable_asr"]
616
+ if not enable_asr:
617
+ return False
618
+ vid_mem_info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(0))
619
+ free_vid_mem = vid_mem_info.free / (1024*1024)
620
+ print("free video memory in MB = ", free_vid_mem)
621
+ if whisper_model is not None:
622
+ whisper_model.unload_model()
623
+ del whisper_model
624
+ whisper_model = None
625
+ whisper_model = WhisperTRTLLM(asr_engine_path, assets_dir=asr_assets_path)
626
+ whisper_model_loaded = True
627
+ return True
628
+
629
+ interface.on_mic_button_click(mic_init_handler)
630
+
631
+ def mic_recording_done_handler(audio_path):
632
+ transcription = ""
633
+ global whisper_model, enable_asr, whisper_model_loaded
634
+ if not enable_asr:
635
+ return ""
636
+
637
+ # Check and wait until model is loaded before running it.
638
+ checks_left_for_model_loading = 40
639
+ sleep_time = 0.2
640
+ while checks_left_for_model_loading>0 and not whisper_model_loaded:
641
+ time.sleep(sleep_time)
642
+ checks_left_for_model_loading -= 1
643
+ assert checks_left_for_model_loading>0, f"Whisper model loading not finished even after {(checks_left_for_model_loading*sleep_time)} seconds"
644
+ if checks_left_for_model_loading == 0:
645
+ return ""
646
+
647
+ # Covert the audio file into required sampling rate
648
+ current_sampling_rate, data = wavfile.read(audio_path)
649
+ new_sampling_rate = 16000
650
+ number_of_samples = round(len(data) * float(new_sampling_rate) / current_sampling_rate)
651
+ data = sps.resample(data, number_of_samples)
652
+ new_file_path = os.path.join( os.path.dirname(audio_path), "whisper_audio_input.wav" )
653
+ wavfile.write(new_file_path, new_sampling_rate, data.astype(np.int16))
654
+ language = "english"
655
+ if selected_ChatGLM: language = "chinese"
656
+ transcription = decode_audio_file( new_file_path, whisper_model, language=language, mel_filters_dir=asr_assets_path)
657
+
658
+ if whisper_model is not None:
659
+ whisper_model.unload_model()
660
+ del whisper_model
661
+ whisper_model = None
662
+ whisper_model_loaded = False
663
+ return transcription
664
+
665
+ interface.on_mic_recording_done(mic_recording_done_handler)
666
+
667
+ def model_download_handler(model_info):
668
+ download_path = os.path.join(os.getcwd(), "model")
669
+ status = download_model_by_name(model_info=model_info, download_path=download_path)
670
+ print(f"Model download status: {status}")
671
+ return status
672
+
673
+ interface.on_model_downloaded(model_download_handler)
674
+
675
+ def model_install_handler(model_info):
676
+ download_path = os.path.join(os.getcwd(), "model")
677
+ global llm, service_context
678
+ #unload the current model
679
+ if llm is not None:
680
+ llm.unload_model()
681
+ del llm
682
+ llm = None
683
+ # build the engine
684
+ status = build_engine_by_name(model_info=model_info , download_path= download_path)
685
+ print(f"Engine build status: {status}")
686
+ return status
687
+
688
+ interface.on_model_installed(model_install_handler)
689
+
690
+ def model_delete_handler(model_info):
691
+ print("Model deleting ", model_info)
692
+ model_dir = os.path.join(os.getcwd(), "model", model_info['id'])
693
+ isSuccess = True
694
+ if os.path.isdir(model_dir):
695
+ try:
696
+ shutil.rmtree(model_dir)
697
+ except Exception as e:
698
+ print("Exception during temp folder delete", e)
699
+ isSuccess = False
700
+ return isSuccess
701
+
702
+ interface.on_model_delete(model_delete_handler)
703
+
704
+ interface.on_regenerate_index(handle_regenerate_index)
705
+ # render the interface
706
+ interface.render()
deploy-docs.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Docs to GitHub Pages
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ build:
13
+ name: Build Docusaurus
14
+ runs-on: ubuntu-latest
15
+ if: github.repository == 'OpenDevin/OpenDevin'
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+ - uses: actions/setup-node@v4
21
+ with:
22
+ node-version: 18
23
+ cache: npm
24
+ cache-dependency-path: docs/package-lock.json
25
+ - name: Set up Python
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: "3.11"
29
+
30
+ - name: Generate Python Docs
31
+ run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown
32
+ - name: Install dependencies
33
+ run: cd docs && npm ci
34
+ - name: Build website
35
+ run: cd docs && npm run build
36
+
37
+ - name: Upload Build Artifact
38
+ if: github.ref == 'refs/heads/main'
39
+ uses: actions/upload-pages-artifact@v3
40
+ with:
41
+ path: docs/build
42
+
43
+ deploy:
44
+ name: Deploy to GitHub Pages
45
+ needs: build
46
+ if: github.ref == 'refs/heads/main' && github.repository == 'OpenDevin/OpenDevin'
47
+ # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
48
+ permissions:
49
+ pages: write # to deploy to Pages
50
+ id-token: write # to verify the deployment originates from an appropriate source
51
+ # Deploy to the github-pages environment
52
+ environment:
53
+ name: github-pages
54
+ url: ${{ steps.deployment.outputs.page_url }}
55
+ runs-on: ubuntu-latest
56
+ steps:
57
+ - name: Deploy to GitHub Pages
58
+ id: deployment
59
+ uses: actions/deploy-pages@v4
dummy-agent-test.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run e2e test with dummy agent
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ pull_request:
12
+
13
+ env:
14
+ PERSIST_SANDBOX : "false"
15
+
16
+ jobs:
17
+ test:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.11'
25
+ - name: Set up environment
26
+ run: |
27
+ curl -sSL https://install.python-poetry.org | python3 -
28
+ poetry install --without evaluation
29
+ poetry run playwright install --with-deps chromium
30
+ wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
31
+ - name: Run tests
32
+ run: |
33
+ poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
ghcr.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish Docker Image
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ tags:
12
+ - '*'
13
+ pull_request:
14
+ workflow_dispatch:
15
+ inputs:
16
+ reason:
17
+ description: 'Reason for manual trigger'
18
+ required: true
19
+ default: ''
20
+
21
+ jobs:
22
+ ghcr_build_and_push:
23
+ runs-on: ubuntu-latest
24
+
25
+ permissions:
26
+ contents: read
27
+ packages: write
28
+
29
+ strategy:
30
+ matrix:
31
+ image: ["app", "sandbox"]
32
+
33
+ steps:
34
+ - name: checkout
35
+ uses: actions/checkout@v4
36
+
37
+ - name: Free Disk Space (Ubuntu)
38
+ uses: jlumbroso/free-disk-space@main
39
+ with:
40
+ # this might remove tools that are actually needed,
41
+ # if set to "true" but frees about 6 GB
42
+ tool-cache: true
43
+
44
+ # all of these default to true, but feel free to set to
45
+ # "false" if necessary for your workflow
46
+ android: true
47
+ dotnet: true
48
+ haskell: true
49
+ large-packages: true
50
+ docker-images: false
51
+ swap-storage: true
52
+
53
+ - name: Set up QEMU
54
+ uses: docker/setup-qemu-action@v3
55
+
56
+ - name: Set up Docker Buildx
57
+ id: buildx
58
+ uses: docker/setup-buildx-action@v3
59
+
60
+ - name: Login to ghcr
61
+ uses: docker/login-action@v1
62
+ with:
63
+ registry: ghcr.io
64
+ username: ${{ github.repository_owner }}
65
+ password: ${{ secrets.GITHUB_TOKEN }}
66
+
67
+ - name: Build and push ${{ matrix.image }}
68
+ if: "!github.event.pull_request.head.repo.fork"
69
+ run: |
70
+ ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} --push
71
+
72
+ - name: Build ${{ matrix.image }}
73
+ if: "github.event.pull_request.head.repo.fork"
74
+ run: |
75
+ ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }}
76
+
77
+ docker_build_success:
78
+ name: Docker Build Success
79
+ runs-on: ubuntu-latest
80
+ needs: ghcr_build_and_push
81
+ steps:
82
+ - run: echo Done!
lint.yml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Lint
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ pull_request:
12
+
13
+ jobs:
14
+ lint-frontend:
15
+ name: Lint frontend
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install Node.js 20
21
+ uses: actions/setup-node@v4
22
+ with:
23
+ node-version: 20
24
+
25
+ - name: Install dependencies
26
+ run: |
27
+ cd frontend
28
+ npm install --frozen-lockfile
29
+
30
+ - name: Lint
31
+ run: |
32
+ cd frontend
33
+ npm run lint
34
+
35
+ lint-python:
36
+ name: Lint python
37
+ runs-on: ubuntu-latest
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+ with:
41
+ fetch-depth: 0
42
+ - name: Set up python
43
+ uses: actions/setup-python@v5
44
+ with:
45
+ python-version: 3.11
46
+ cache: 'pip'
47
+ - name: Install pre-commit
48
+ run: pip install pre-commit==3.7.0
49
+ - name: Run pre-commit hooks
50
+ if: github.ref != 'refs/heads/main'
51
+ run: |
52
+ git fetch https://github.com/OpenDevin/OpenDevin.git main:main && \
53
+ pre-commit run \
54
+ --files \
55
+ $(git diff --name-only $(git merge-base main $(git branch --show-current)) $(git branch --show-current) | tr '\n' ' ') \
56
+ --show-diff-on-failure \
57
+ --config ./dev_config/python/.pre-commit-config.yaml
review-pr.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Use OpenDevin to Review Pull Request
2
+
3
+ on:
4
+ pull_request:
5
+ types: [synchronize, labeled]
6
+
7
+ permissions:
8
+ contents: write
9
+ pull-requests: write
10
+
11
+ jobs:
12
+ dogfood:
13
+ if: contains(github.event.pull_request.labels.*.name, 'review-this')
14
+ runs-on: ubuntu-latest
15
+ container:
16
+ image: ghcr.io/opendevin/opendevin
17
+ volumes:
18
+ - /var/run/docker.sock:/var/run/docker.sock
19
+
20
+ steps:
21
+ - name: install git, github cli
22
+ run: |
23
+ apt-get install -y git gh
24
+ git config --global --add safe.directory $PWD
25
+
26
+ - name: Checkout Repository
27
+ uses: actions/checkout@v4
28
+ with:
29
+ ref: ${{ github.event.pull_request.base.ref }} # check out the target branch
30
+
31
+ - name: Download Diff
32
+ run: |
33
+ curl -O "${{ github.event.pull_request.diff_url }}" -L
34
+
35
+ - name: Write Task File
36
+ run: |
37
+ echo "Your coworker wants to apply a pull request to this project. Read and review ${{ github.event.pull_request.number }}.diff file. Create a review-${{ github.event.pull_request.number }}.txt and write your concise comments and suggestions there." > task.txt
38
+ echo "" >> task.txt
39
+ echo "Title" >> task.txt
40
+ echo "${{ github.event.pull_request.title }}" >> task.txt
41
+ echo "" >> task.txt
42
+ echo "Description" >> task.txt
43
+ echo "${{ github.event.pull_request.body }}" >> task.txt
44
+ echo "" >> task.txt
45
+ echo "Diff file is: ${{ github.event.pull_request.number }}.diff" >> task.txt
46
+
47
+ - name: Run OpenDevin
48
+ env:
49
+ LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
50
+ SANDBOX_TYPE: exec
51
+ run: |
52
+ WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE
53
+ rm task.txt
54
+
55
+ - name: Check if review file is non-empty
56
+ id: check_file
57
+ run: |
58
+ ls -la
59
+ if [[ -s review-${{ github.event.pull_request.number }}.txt ]]; then
60
+ echo "non_empty=true" >> $GITHUB_OUTPUT
61
+ fi
62
+ shell: bash
63
+
64
+ - name: Create PR review if file is non-empty
65
+ env:
66
+ GH_TOKEN: ${{ github.token }}
67
+ if: steps.check_file.outputs.non_empty == 'true'
68
+ run: |
69
+ gh pr review ${{ github.event.pull_request.number }} --comment --body-file "review-${{ github.event.pull_request.number }}.txt"
run-integration-tests.yml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Integration Tests
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ paths-ignore:
12
+ - '**/*.md'
13
+ - 'frontend/**'
14
+ - 'docs/**'
15
+ - 'evaluation/**'
16
+ pull_request:
17
+
18
+ env:
19
+ PERSIST_SANDBOX : "false"
20
+
21
+ jobs:
22
+ integration-tests-on-linux:
23
+ name: Integration Tests on Linux
24
+ runs-on: ubuntu-latest
25
+ strategy:
26
+ fail-fast: false
27
+ matrix:
28
+ python-version: ["3.11"]
29
+ sandbox: ["ssh", "exec", "local"]
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - name: Install poetry via pipx
34
+ run: pipx install poetry
35
+
36
+ - name: Set up Python
37
+ uses: actions/setup-python@v5
38
+ with:
39
+ python-version: ${{ matrix.python-version }}
40
+ cache: 'poetry'
41
+
42
+ - name: Install Python dependencies using Poetry
43
+ run: poetry install
44
+
45
+ - name: Build Environment
46
+ run: make build
47
+
48
+ - name: Run Integration Tests
49
+ env:
50
+ SANDBOX_TYPE: ${{ matrix.sandbox }}
51
+ run: |
52
+ TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh
53
+
54
+ - name: Upload coverage to Codecov
55
+ uses: codecov/codecov-action@v4
56
+ env:
57
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
58
+
59
+ integration-tests-on-mac:
60
+ name: Integration Tests on MacOS
61
+ runs-on: macos-13
62
+ if: contains(github.event.pull_request.title, 'mac') || contains(github.event.pull_request.title, 'Mac')
63
+ strategy:
64
+ fail-fast: false
65
+ matrix:
66
+ python-version: ["3.11"]
67
+ sandbox: ["ssh"]
68
+ steps:
69
+ - uses: actions/checkout@v4
70
+
71
+ - name: Install poetry via pipx
72
+ run: pipx install poetry
73
+
74
+ - name: Set up Python
75
+ uses: actions/setup-python@v5
76
+ with:
77
+ python-version: ${{ matrix.python-version }}
78
+ cache: 'poetry'
79
+
80
+ - name: Install Python dependencies using Poetry
81
+ run: poetry install
82
+
83
+ - name: Install & Start Docker
84
+ run: |
85
+ brew install colima docker
86
+ colima start
87
+
88
+ # For testcontainers to find the Colima socket
89
+ # https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
90
+ sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
91
+
92
+ - name: Build Environment
93
+ run: make build
94
+
95
+ - name: Run Integration Tests
96
+ env:
97
+ SANDBOX_TYPE: ${{ matrix.sandbox }}
98
+ run: |
99
+ TEST_IN_CI=true TEST_ONLY=true ./tests/integration/regenerate.sh
100
+
101
+ - name: Upload coverage to Codecov
102
+ uses: codecov/codecov-action@v4
103
+ env:
104
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run-unit-tests.yml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Unit Tests
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+ paths-ignore:
12
+ - '**/*.md'
13
+ - 'frontend/**'
14
+ - 'docs/**'
15
+ - 'evaluation/**'
16
+ pull_request:
17
+
18
+ env:
19
+ PERSIST_SANDBOX : "false"
20
+
21
+ jobs:
22
+ test-on-macos:
23
+ name: Test on macOS
24
+ runs-on: macos-13
25
+ env:
26
+ INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation
27
+ strategy:
28
+ matrix:
29
+ python-version: ["3.11"]
30
+
31
+ steps:
32
+ - uses: actions/checkout@v4
33
+
34
+ - name: Install poetry via pipx
35
+ run: pipx install poetry
36
+
37
+ - name: Set up Python ${{ matrix.python-version }}
38
+ uses: actions/setup-python@v5
39
+ with:
40
+ python-version: ${{ matrix.python-version }}
41
+ cache: "poetry"
42
+
43
+ - name: Install Python dependencies using Poetry
44
+ run: poetry install
45
+
46
+ - name: Install & Start Docker
47
+ if: env.INSTALL_DOCKER == '1'
48
+ run: |
49
+ brew install colima docker
50
+ colima start
51
+
52
+ # For testcontainers to find the Colima socket
53
+ # https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
54
+ sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
55
+
56
+ - name: Build Environment
57
+ run: make build
58
+
59
+ - name: Run Tests
60
+ run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox"
61
+
62
+ - name: Upload coverage to Codecov
63
+ uses: codecov/codecov-action@v4
64
+ env:
65
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
66
+ test-on-linux:
67
+ name: Test on Linux
68
+ runs-on: ubuntu-latest
69
+ env:
70
+ INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation
71
+ strategy:
72
+ matrix:
73
+ python-version: ["3.11"]
74
+
75
+ steps:
76
+ - uses: actions/checkout@v4
77
+
78
+ - name: Install poetry via pipx
79
+ run: pipx install poetry
80
+
81
+ - name: Set up Python
82
+ uses: actions/setup-python@v5
83
+ with:
84
+ python-version: ${{ matrix.python-version }}
85
+ cache: "poetry"
86
+
87
+ - name: Install Python dependencies using Poetry
88
+ run: poetry install --without evaluation
89
+
90
+ - name: Build Environment
91
+ run: make build
92
+
93
+ - name: Run Tests
94
+ run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox"
95
+
96
+ - name: Upload coverage to Codecov
97
+ uses: codecov/codecov-action@v4
98
+ env:
99
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
100
+
101
+ test-for-sandbox:
102
+ name: Test for Sandbox
103
+ runs-on: ubuntu-latest
104
+ steps:
105
+ - uses: actions/checkout@v4
106
+
107
+ - name: Install poetry via pipx
108
+ run: pipx install poetry
109
+
110
+ - name: Set up Python
111
+ uses: actions/setup-python@v5
112
+ with:
113
+ python-version: "3.11"
114
+ cache: "poetry"
115
+
116
+ - name: Install Python dependencies using Poetry
117
+ run: poetry install
118
+
119
+ - name: Build Environment
120
+ run: make build
121
+
122
+ - name: Run Integration Test for Sandbox
123
+ run: |
124
+ poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_sandbox.py
125
+
126
+ - name: Upload coverage to Codecov
127
+ uses: codecov/codecov-action@v4
128
+ env:
129
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
solve-issue.yml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Use OpenDevin to Resolve GitHub Issue
2
+
3
+ on:
4
+ issues:
5
+ types: [labeled]
6
+
7
+ permissions:
8
+ contents: write
9
+ pull-requests: write
10
+ issues: write
11
+
12
+ jobs:
13
+ dogfood:
14
+ if: github.event.label.name == 'solve-this'
15
+ runs-on: ubuntu-latest
16
+ container:
17
+ image: ghcr.io/opendevin/opendevin
18
+ volumes:
19
+ - /var/run/docker.sock:/var/run/docker.sock
20
+
21
+ steps:
22
+ - name: install git, github cli
23
+ run: apt-get install -y git gh
24
+
25
+ - name: Checkout Repository
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Write Task File
29
+ env:
30
+ ISSUE_TITLE: ${{ github.event.issue.title }}
31
+ ISSUE_BODY: ${{ github.event.issue.body }}
32
+ run: |
33
+ echo "TITLE:" > task.txt
34
+ echo "${ISSUE_TITLE}" >> task.txt
35
+ echo "" >> task.txt
36
+ echo "BODY:" >> task.txt
37
+ echo "${ISSUE_BODY}" >> task.txt
38
+
39
+ - name: Run OpenDevin
40
+ env:
41
+ ISSUE_TITLE: ${{ github.event.issue.title }}
42
+ ISSUE_BODY: ${{ github.event.issue.body }}
43
+ LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
44
+ SANDBOX_TYPE: exec
45
+ run: |
46
+ WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE python ./opendevin/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE
47
+ rm task.txt
48
+
49
+ - name: Setup Git, Create Branch, and Commit Changes
50
+ run: |
51
+ # Setup Git configuration
52
+ git config --global --add safe.directory $PWD
53
+ git config --global user.name 'OpenDevin'
54
+ git config --global user.email 'OpenDevin@users.noreply.github.com'
55
+
56
+ # Create a unique branch name with a timestamp
57
+ BRANCH_NAME="fix/${{ github.event.issue.number }}-$(date +%Y%m%d%H%M%S)"
58
+
59
+ # Checkout new branch
60
+ git checkout -b $BRANCH_NAME
61
+
62
+ # Add all changes to staging, except task.txt
63
+ git add --all -- ':!task.txt'
64
+
65
+ # Commit the changes, if any
66
+ git commit -m "OpenDevin: Resolve Issue #${{ github.event.issue.number }}"
67
+ if [ $? -ne 0 ]; then
68
+ echo "No changes to commit."
69
+ exit 0
70
+ fi
71
+
72
+ # Push changes
73
+ git push --set-upstream origin $BRANCH_NAME
74
+
75
+ - name: Fetch Default Branch
76
+ env:
77
+ GH_TOKEN: ${{ github.token }}
78
+ run: |
79
+ # Fetch the default branch using gh cli
80
+ DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq .defaultBranchRef.name)
81
+ echo "Default branch is $DEFAULT_BRANCH"
82
+ echo "DEFAULT_BRANCH=$DEFAULT_BRANCH" >> $GITHUB_ENV
83
+
84
+ - name: Generate PR
85
+ env:
86
+ GH_TOKEN: ${{ github.token }}
87
+ run: |
88
+ # Create PR and capture URL
89
+ PR_URL=$(gh pr create \
90
+ --title "OpenDevin: Resolve Issue #2" \
91
+ --body "This PR was generated by OpenDevin to resolve issue #2" \
92
+ --repo "foragerr/OpenDevin" \
93
+ --head "${{ github.head_ref }}" \
94
+ --base "${{ env.DEFAULT_BRANCH }}" \
95
+ | grep -o 'https://github.com/[^ ]*')
96
+
97
+ # Extract PR number from URL
98
+ PR_NUMBER=$(echo "$PR_URL" | grep -o '[0-9]\+$')
99
+
100
+ # Set environment vars
101
+ echo "PR_URL=$PR_URL" >> $GITHUB_ENV
102
+ echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
103
+
104
+ - name: Post Comment
105
+ env:
106
+ GH_TOKEN: ${{ github.token }}
107
+ run: |
108
+ gh issue comment ${{ github.event.issue.number }} \
109
+ -b "OpenDevin raised [PR #${{ env.PR_NUMBER }}](${{ env.PR_URL }}) to resolve this issue."
stale.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: 'Close stale issues'
2
+ on:
3
+ schedule:
4
+ - cron: '30 1 * * *'
5
+
6
+ jobs:
7
+ stale:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/stale@v9
11
+ with:
12
+ # Aggressively close issues that have been explicitly labeled `age-out`
13
+ any-of-labels: age-out
14
+ stale-issue-message: 'This issue is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 day.'
15
+ close-issue-message: 'This issue was closed because it has been stalled for over 7 days with no activity.'
16
+ stale-pr-message: 'This PR is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 days.'
17
+ close-pr-message: 'This PR was closed because it has been stalled for over 7 days with no activity.'
18
+ days-before-stale: 7
19
+ days-before-close: 1
20
+
21
+ - uses: actions/stale@v9
22
+ with:
23
+ # Be more lenient with other issues
24
+ stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
25
+ close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
26
+ stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
27
+ close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
28
+ days-before-stale: 30
29
+ days-before-close: 7