Alexander Watson
commited on
Commit
•
fb73a92
1
Parent(s):
097faaf
fix multi-line bug, add SDK code to download zip
Browse files
app.py
CHANGED
@@ -11,8 +11,7 @@ import requests
|
|
11 |
import streamlit as st
|
12 |
from datasets import load_dataset
|
13 |
from gretel_client import Gretel
|
14 |
-
from navigator_helpers import
|
15 |
-
StreamlitLogHandler)
|
16 |
|
17 |
# Create a StringIO buffer to capture the logging output
|
18 |
log_buffer = StringIO()
|
@@ -363,7 +362,9 @@ def main():
|
|
363 |
|
364 |
with st.expander("Download SDK Code", expanded=False):
|
365 |
st.markdown("### Ready to generate data at scale?")
|
366 |
-
st.write(
|
|
|
|
|
367 |
|
368 |
config_text = f"""
|
369 |
#!pip install -Uqq git+https://github.com/gretelai/navigator-helpers.git
|
@@ -397,8 +398,8 @@ def main():
|
|
397 |
navigator_tabular="{navigator_tabular}",
|
398 |
navigator_llm="{navigator_llm}",
|
399 |
co_teach_llms={co_teach_llms},
|
400 |
-
instruction_format_prompt=
|
401 |
-
response_format_prompt=
|
402 |
)
|
403 |
|
404 |
# Create the data augmenter and perform augmentation
|
@@ -563,17 +564,23 @@ def main():
|
|
563 |
with open(augmented_data_file_path, "w") as augmented_data_file:
|
564 |
augmented_data_file.write(augmented_data_jsonl)
|
565 |
|
566 |
-
#
|
|
|
|
|
|
|
|
|
|
|
567 |
zip_file_path = os.path.join(temp_dir, "augmentation_results.zip")
|
568 |
with zipfile.ZipFile(zip_file_path, "w") as zip_file:
|
569 |
zip_file.write(log_file_path, "complete_logs.jsonl")
|
570 |
if augmented_data_jsonl:
|
571 |
zip_file.write(augmented_data_file_path, "augmented_data.jsonl")
|
|
|
572 |
|
573 |
# Download the ZIP file
|
574 |
with open(zip_file_path, "rb") as zip_file:
|
575 |
st.download_button(
|
576 |
-
label="💾 Download Synthetic Data and
|
577 |
data=zip_file.read(),
|
578 |
file_name="gretel_synthetic_data.zip",
|
579 |
mime="application/zip",
|
|
|
11 |
import streamlit as st
|
12 |
from datasets import load_dataset
|
13 |
from gretel_client import Gretel
|
14 |
+
from navigator_helpers import DataAugmentationConfig, DataAugmenter, StreamlitLogHandler
|
|
|
15 |
|
16 |
# Create a StringIO buffer to capture the logging output
|
17 |
log_buffer = StringIO()
|
|
|
362 |
|
363 |
with st.expander("Download SDK Code", expanded=False):
|
364 |
st.markdown("### Ready to generate data at scale?")
|
365 |
+
st.write(
|
366 |
+
"Get started with your current configuration using the SDK code below:"
|
367 |
+
)
|
368 |
|
369 |
config_text = f"""
|
370 |
#!pip install -Uqq git+https://github.com/gretelai/navigator-helpers.git
|
|
|
398 |
navigator_tabular="{navigator_tabular}",
|
399 |
navigator_llm="{navigator_llm}",
|
400 |
co_teach_llms={co_teach_llms},
|
401 |
+
instruction_format_prompt='''{instruction_format_prompt}''',
|
402 |
+
response_format_prompt='''{response_format_prompt}'''
|
403 |
)
|
404 |
|
405 |
# Create the data augmenter and perform augmentation
|
|
|
564 |
with open(augmented_data_file_path, "w") as augmented_data_file:
|
565 |
augmented_data_file.write(augmented_data_jsonl)
|
566 |
|
567 |
+
# Write the SDK code to a file
|
568 |
+
sdk_file_path = os.path.join(temp_dir, "data_augmentation_code.py")
|
569 |
+
with open(sdk_file_path, "w") as sdk_file:
|
570 |
+
sdk_file.write(config_text)
|
571 |
+
|
572 |
+
# Create a ZIP file containing the logs, augmented data, and SDK code
|
573 |
zip_file_path = os.path.join(temp_dir, "augmentation_results.zip")
|
574 |
with zipfile.ZipFile(zip_file_path, "w") as zip_file:
|
575 |
zip_file.write(log_file_path, "complete_logs.jsonl")
|
576 |
if augmented_data_jsonl:
|
577 |
zip_file.write(augmented_data_file_path, "augmented_data.jsonl")
|
578 |
+
zip_file.write(sdk_file_path, "data_augmentation_code.py")
|
579 |
|
580 |
# Download the ZIP file
|
581 |
with open(zip_file_path, "rb") as zip_file:
|
582 |
st.download_button(
|
583 |
+
label="💾 Download Synthetic Data, Logs, and SDK Code",
|
584 |
data=zip_file.read(),
|
585 |
file_name="gretel_synthetic_data.zip",
|
586 |
mime="application/zip",
|