AlbertoFH98 commited on
Commit
2bae7ed
1 Parent(s): 24bbdf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -27
app.py CHANGED
@@ -17,7 +17,7 @@ import os
17
  import re
18
  st.set_page_config(layout="wide")
19
 
20
- @st.cache
21
  def get_args():
22
  # -- 1. Setup arguments
23
  parser = argparse.ArgumentParser()
@@ -34,17 +34,15 @@ def get_args():
34
 
35
  # -- 3. Setup constants
36
  args = parser.parse_args()
37
- return args
38
 
39
- @st.cache
40
- def get_podcast_data(transcription_path):
41
- together.api_key = os.environ["TOGETHER_API_KEY"]
42
- together.Models.start(MODEL)
43
- podcast_url_video_df = pd.read_csv(PODCAST_URL_VIDEO_PATH, sep=';')
44
  return podcast_url_video_df
45
 
46
- @st.cache
47
- def setup_basics_comp(emb_model, model, default_system_prompt_link, logger, podcast_url_video_df, img_size=100):
48
  r = requests.get("https://raw.githubusercontent.com/AlbertoUAH/Castena/main/media/castena-animated-icon.gif", stream=True)
49
  icon = Image.open(r.raw)
50
  icon = icon.resize((img_size, img_size))
@@ -56,17 +54,22 @@ def setup_basics_comp(emb_model, model, default_system_prompt_link, logger, podc
56
  video_option_joined = '_'.join(video_option.replace(': Entrevista a ', ' ').lower().split(' ')).replace("\'", "")
57
  video_option_joined_path = "{}_transcription.txt".format(video_option_joined)
58
  youtube_video_url = list(podcast_url_video_df[podcast_url_video_df['podcast_name'].str.contains(video_option_joined)]['youtube_video_url'])[0].replace("\'", "")
 
59
 
60
  # -- 4. Setup request for system prompt
61
  f = urllib.request.urlopen(default_system_prompt_link)
62
- DEFAULT_SYSTEM_PROMPT = str(f.read(), 'UTF-8')
63
 
64
  # -- 5. Setup app
65
- translator, nlp, retriever = utils.setup_app(video_option_joined_path, emb_model, model, logger)
66
- return translator, nlp, retriever, video_option, video_option_joined_path
 
 
 
 
67
 
68
  def main():
69
- args = get_args()
70
  B_INST, E_INST = "[INST]", "[/INST]"
71
  B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
72
 
@@ -76,12 +79,14 @@ def main():
76
  TRANSCRIPTION_PATH = '{}_transcription.txt'.format(TRANSCRIPTION)
77
  MODEL = args.MODEL
78
  EMB_MODEL = args.EMB_MODEL
 
 
79
 
80
- podcast_url_video_df = get_podcast_data(TRANSCRIPTION_PATH)
81
 
82
- translator, nlp, retriever, video_option, video_option_joined_path = setup_basics_comp(EMB_MODEL, MODEL,
83
- DEFAULT_SYSTEM_PROMPT_LINK, logger,
84
- podcast_url_video_df, img_size=100)
85
 
86
 
87
  # -- 6. Setup prompt template + llm chain
@@ -90,7 +95,7 @@ def main():
90
  PREGUNTA: {question}
91
 
92
  RESPUESTA: """
93
- prompt_template = utils.get_prompt(instruction, DEFAULT_SYSTEM_PROMPT, B_SYS, E_SYS, B_INST, E_INST, logger)
94
 
95
  llama_prompt = PromptTemplate(
96
  template=prompt_template, input_variables=["context", "question"]
@@ -100,12 +105,7 @@ RESPUESTA: """
100
  qa_chain = utils.create_llm_chain(MODEL, retriever, chain_type_kwargs, logger, video_option_joined_path)
101
 
102
  # ---------------------------------------------------------------------
103
- # -- 7. Setup Streamlit app
104
- st.title("[Podcast: {}]({})".format(video_option.replace("'", "").title(), youtube_video_url))
105
-
106
- width = 50
107
- side = (100 - width) / 2
108
- _, container, _ = st.columns([side, width, side])
109
  with container:
110
  st_player(utils.typewrite(youtube_video_url))
111
 
@@ -131,11 +131,9 @@ RESPUESTA: """
131
  end_time_seconds_list.append(end_time_seconds)
132
 
133
  if start_time_str_list:
134
- width = 40
135
- side = (100 - width) / 2
136
  for start_time_seconds, start_time_str, end_time_seconds in zip(start_time_seconds_list, start_time_str_list, end_time_seconds_list):
137
  st.markdown("__Fragmento: " + start_time_str + "__")
138
- _, container, _ = st.columns([side, width, side])
139
  with container:
140
  st_player(youtube_video_url.replace("?enablejsapi=1", "") + f'?start={start_time_seconds}&end={end_time_seconds}')
141
 
 
17
  import re
18
  st.set_page_config(layout="wide")
19
 
20
+ @st.cache_data
21
  def get_args():
22
  # -- 1. Setup arguments
23
  parser = argparse.ArgumentParser()
 
34
 
35
  # -- 3. Setup constants
36
  args = parser.parse_args()
37
+ return args, logger
38
 
39
+ @st.cache_data
40
+ def get_podcast_data(path):
41
+ podcast_url_video_df = pd.read_csv(path, sep=';')
 
 
42
  return podcast_url_video_df
43
 
44
+ @st.cache_resource(experimental_allow_widgets=True)
45
+ def get_basics_comp(emb_model, model, default_system_prompt_link, _logger, podcast_url_video_df, img_size=100):
46
  r = requests.get("https://raw.githubusercontent.com/AlbertoUAH/Castena/main/media/castena-animated-icon.gif", stream=True)
47
  icon = Image.open(r.raw)
48
  icon = icon.resize((img_size, img_size))
 
54
  video_option_joined = '_'.join(video_option.replace(': Entrevista a ', ' ').lower().split(' ')).replace("\'", "")
55
  video_option_joined_path = "{}_transcription.txt".format(video_option_joined)
56
  youtube_video_url = list(podcast_url_video_df[podcast_url_video_df['podcast_name'].str.contains(video_option_joined)]['youtube_video_url'])[0].replace("\'", "")
57
+ st.title("[Podcast: {}]({})".format(video_option.replace("'", "").title(), youtube_video_url))
58
 
59
  # -- 4. Setup request for system prompt
60
  f = urllib.request.urlopen(default_system_prompt_link)
61
+ default_system_prompt = str(f.read(), 'UTF-8')
62
 
63
  # -- 5. Setup app
64
+ translator, nlp, retriever = utils.setup_app(video_option_joined_path, emb_model, model, _logger)
65
+
66
+ # -- 6. Setup model
67
+ together.api_key = os.environ["TOGETHER_API_KEY"]
68
+ together.Models.start(model)
69
+ return together, translator, nlp, retriever, video_option, video_option_joined_path, default_system_prompt, youtube_video_url
70
 
71
  def main():
72
+ args, logger = get_args()
73
  B_INST, E_INST = "[INST]", "[/INST]"
74
  B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
75
 
 
79
  TRANSCRIPTION_PATH = '{}_transcription.txt'.format(TRANSCRIPTION)
80
  MODEL = args.MODEL
81
  EMB_MODEL = args.EMB_MODEL
82
+ WIDTH = 50
83
+ SIDE = (100 - WIDTH) / 2
84
 
85
+ podcast_url_video_df = get_podcast_data(PODCAST_URL_VIDEO_PATH)
86
 
87
+ together, translator, nlp, retriever, video_option, video_option_joined_path, default_system_prompt, youtube_video_url = get_basics_comp(EMB_MODEL, MODEL,
88
+ DEFAULT_SYSTEM_PROMPT_LINK, logger,
89
+ podcast_url_video_df, img_size=100)
90
 
91
 
92
  # -- 6. Setup prompt template + llm chain
 
95
  PREGUNTA: {question}
96
 
97
  RESPUESTA: """
98
+ prompt_template = utils.get_prompt(instruction, default_system_prompt, B_SYS, E_SYS, B_INST, E_INST, logger)
99
 
100
  llama_prompt = PromptTemplate(
101
  template=prompt_template, input_variables=["context", "question"]
 
105
  qa_chain = utils.create_llm_chain(MODEL, retriever, chain_type_kwargs, logger, video_option_joined_path)
106
 
107
  # ---------------------------------------------------------------------
108
+ _, container, _ = st.columns([SIDE, WIDTH, SIDE])
 
 
 
 
 
109
  with container:
110
  st_player(utils.typewrite(youtube_video_url))
111
 
 
131
  end_time_seconds_list.append(end_time_seconds)
132
 
133
  if start_time_str_list:
 
 
134
  for start_time_seconds, start_time_str, end_time_seconds in zip(start_time_seconds_list, start_time_str_list, end_time_seconds_list):
135
  st.markdown("__Fragmento: " + start_time_str + "__")
136
+ _, container, _ = st.columns([SIDE, WIDTH, SIDE])
137
  with container:
138
  st_player(youtube_video_url.replace("?enablejsapi=1", "") + f'?start={start_time_seconds}&end={end_time_seconds}')
139