Spaces:

Samarth991
/

LLAMA-QA-AudioFiles

Running

App Files Files Community

Samarth991 commited on Sep 28, 2023

Commit

99b2550

•

1 Parent(s): 34a0eeb

adding duration feature

Browse files

Files changed (1) hide show

app.py +4 -1

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ FILE_EXT = ['wav','mp3']
 MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1024
 DEFAULT_TEMPERATURE = 0.1
 def create_logger():
     formatter = logging.Formatter('%(asctime)s:%(levelname)s:- %(message)s')
@@ -55,6 +56,7 @@ def process_documents(documents,data_chunk=1500,chunk_overlap=100):
 def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',temperature=0.1,max_tokens=4096,duration=5):
     device='cpu'
     logger.info("Audio File Name :",wav_file.name)
     whisper = whisper_app.WHISPERModel(model_name=wav_model,device=device)
     logger.info("Whisper Model Loaded || Model size:{}".format(wav_model))
     text_info = whisper.speech_to_text(audio_path=wav_file.name)
@@ -67,6 +69,7 @@ def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',tempera
     embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large',model_kwargs={"device": device})
     texts = process_documents(documents=document)
     global vector_db
     vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
     global qa
@@ -82,7 +85,6 @@ def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',tempera
         chat = llm_ops.get_openai_chat_model(API_key=API_key)
     chain_type_kwargs = {"prompt": create_prompt()}
     qa = RetrievalQA.from_chain_type(llm=chat,
                                 chain_type='stuff',
                                 retriever=vector_db.as_retriever(),
@@ -165,6 +167,7 @@ with gr.Blocks(css=css) as demo:
                         step=1,
                         value=DEFAULT_MAX_NEW_TOKENS,
                         )
                     temperature = gr.Slider(
                     label='Temperature',
                     minimum=0.1,

 MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1024
 DEFAULT_TEMPERATURE = 0.1
+DEFAULT_DURATION = 5
 def create_logger():
     formatter = logging.Formatter('%(asctime)s:%(levelname)s:- %(message)s')
 def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',temperature=0.1,max_tokens=4096,duration=5):
     device='cpu'
     logger.info("Audio File Name :",wav_file.name)
     whisper = whisper_app.WHISPERModel(model_name=wav_model,device=device)
     logger.info("Whisper Model Loaded || Model size:{}".format(wav_model))
     text_info = whisper.speech_to_text(audio_path=wav_file.name)
     embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large',model_kwargs={"device": device})
     texts = process_documents(documents=document)
     global vector_db
     vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
     global qa
         chat = llm_ops.get_openai_chat_model(API_key=API_key)
     chain_type_kwargs = {"prompt": create_prompt()}
     qa = RetrievalQA.from_chain_type(llm=chat,
                                 chain_type='stuff',
                                 retriever=vector_db.as_retriever(),
                         step=1,
                         value=DEFAULT_MAX_NEW_TOKENS,
                         )
+                    duration = gr.slider(label='duration in min',minimum=5,maximum = 10,step=1,value=DEFAULT_DURATION)
                     temperature = gr.Slider(
                     label='Temperature',
                     minimum=0.1,