import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import spacy
import gradio as gr
import subprocess

# def download_spacy_model(model_name):
#     command = f"python -m spacy download {model_name}"
#     process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
#     stdout, stderr = process.communicate()

#     # Check if the command executed successfully
#     if process.returncode != 0:
#         print(f"An error occurred while downloading the model: {stderr.decode('utf-8')}")
#     else:
#         print(f"Successfully downloaded the model: {stdout.decode('utf-8')}")

# Call the function to download the model

# def find_closest(query):
#     files_contents = []
#     files_names = []

#     for file in os.listdir():
#         if file.endswith(".txt"):
#             with open(file, 'r') as f:
#                 content = f.read()
#                 files_contents.append(content)
#                 files_names.append(file)

#     # Append query to the end
#     files_contents.append(query)

#     # Initialize the TfidfVectorizer
#     tfidf_vectorizer = TfidfVectorizer()

#     # Fit and transform the texts
#     tfidf_matrix = tfidf_vectorizer.fit_transform(files_contents)

#     # Compute the cosine similarity between the query and all files
#     similarity_scores = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1])

#     # Get the index of the file with the highest similarity score
#     max_similarity_idx = similarity_scores.argmax()

#     # Return the name of the file with the highest similarity score
#     return files_names[max_similarity_idx]

# def find_closest(query):
#     try:
#         nlp = spacy.load('en_core_web_md')
#     except:
#         download_spacy_model('en_core_web_md')
#         nlp = spacy.load('en_core_web_md')
#     files_names = []
#     files_vectors = []

#     for file in os.listdir():
#         if file.endswith(".txt"):
#             with open(file, 'r') as f:
#                 content = f.read()
#                 files_names.append(file)
#                 # Get the vector representation of the content
#                 files_vectors.append(nlp(content).vector)

#     # Get the vector representation of the query
#     query_vector = nlp(query).vector

#     # Compute the cosine similarity between the query and all files
#     similarity_scores = cosine_similarity([query_vector], files_vectors)

#     # Get the index of the file with the highest similarity score
#     max_similarity_idx = similarity_scores.argmax()

#     # Return the name of the file with the highest similarity score
#     return files_names[max_similarity_idx]
def find_closest(query):
    files_to_exclude = ["packages.txt", "requirements.txt","pre-requirements.txt"]
    model = SentenceTransformer('all-MiniLM-L6-v2')  # You can choose other models

    files_contents = []
    files_names = []

    for file in os.listdir():
        if file.endswith(".txt")  and file not in files_to_exclude :
            print(f"Found .txt file: {file}")
            with open(file, 'r') as f:
                content = f.read()
                files_contents.append(content)
                files_names.append(file)

    # Append query to the end
    files_contents.append(query)

    # Create sentence embeddings for each text
    embeddings = model.encode(files_contents)

    # Compute the cosine similarity between the query and all files
    similarity_scores = cosine_similarity([embeddings[-1]], embeddings[:-1])

    # Get the index of the file with the highest similarity score
    max_similarity_idx = similarity_scores.argmax()

    # Return the name of the file with the highest similarity score
    return files_names[max_similarity_idx]
def find_closest_mp3(query):
    closest_txt_file = find_closest(query)
    file_name_without_extension, _ = os.path.splitext(closest_txt_file)
    return file_name_without_extension + '.mp3'
my_theme = gr.Theme.from_hub("ysharma/llamas")
with gr.Blocks(theme=my_theme) as demo:
  gr.Markdown("""<h1 style="text-align: center;">BeatLlama Dreambooth</h1>""")
  # video=gr.PlayableVideo("final_video.mp4
  gr.Markdown("""<h2 style="text-align: center;"><span style="color: white;"> Get a song for your dream, but sung by AI!</span></h2>""")
  inp=gr.Textbox(placeholder="Describe your dream!",label="Your dream")
  out=gr.Audio(label="Llamas singing your dream")
  inp.change(find_closest_mp3,inp,out,scroll_to_output=True)
  out.play(None)
demo.queue(1,api_open=False)
demo.launch(show_api=False)