Arnasltlt commited on
Commit
e5dcef9
1 Parent(s): 283c184
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ processed/embeddings.csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: KlauskD
3
- emoji: 🚀
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 3.19.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: QandA
3
+ emoji: 🏃
4
+ colorFrom: indigo
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.18.0
8
  app_file: app.py
9
  pinned: false
10
+ duplicated_from: Arnasltlt/QandA
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import openai
5
+ import pandas as pd
6
+ from openai.embeddings_utils import distances_from_embeddings
7
+
8
+ openai.api_key = os.environ["openai_key"]
9
+ final_file = 'processed/embeddings_with_metadata.csv'
10
+
11
+ # Load the combined DataFrame
12
+ df_combined = pd.read_csv(final_file, index_col=0)
13
+
14
+ # Convert the 'embeddings' column from a string to a list
15
+ df_combined['embeddings'] = df_combined['embeddings'].apply(eval)
16
+
17
+ # ################################################################################
18
+ # ### Step 12
19
+ # ################################################################################
20
+ def create_context(
21
+ question, df_combined, max_len=1800, size="ada"
22
+ ):
23
+ """
24
+ Create a context for a question by finding the most similar context from the dataframe
25
+ """
26
+
27
+ # Get the embeddings for the question
28
+ q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
29
+
30
+ # Get the distances from the embeddings
31
+ df_combined['distances'] = distances_from_embeddings(q_embeddings, df_combined['embeddings'].values,
32
+ distance_metric='cosine')
33
+
34
+ # additional_context = {'file_name':df_combined['fname'],'start':df_combined['start'],'end':df_combined['end']}
35
+ # print(additional_context)
36
+
37
+ returns = []
38
+ cur_len = 0
39
+ additional_context_list = []
40
+ for i, row in df_combined.sort_values('distances', ascending=True).iterrows():
41
+ print(i)
42
+ df_old = pd.read_csv('processed/ddd .csv')
43
+ try:
44
+ additional_context = {"fname_value": df_old.at[i, 'fname'], "start": df_old.at[i, 'start'],
45
+ "end": df_old.at[i, 'end']}
46
+ except KeyError:
47
+ print(f"KeyError: {i} is not a valid index value")
48
+ continue
49
+ additional_context_list.append(additional_context)
50
+
51
+ # Add the length of the text to the current length
52
+ cur_len += row['n_tokens'] + 4
53
+
54
+ # If the context is too long, break
55
+ if cur_len > max_len:
56
+ break
57
+
58
+ # Else add it to the text that is being returned
59
+ returns.append(row["text"])
60
+
61
+ print(additional_context_list)
62
+
63
+ # Return the context and additional context as a dictionary
64
+ context = "\n\n###\n\n".join(returns)
65
+ return {'context': context, "add_context": additional_context_list}
66
+
67
+
68
+ def answer_question(
69
+ df_combined,
70
+ model="text-davinci-003",
71
+ question="",
72
+ max_len=2500,
73
+ size="ada",
74
+ debug=False,
75
+ max_tokens=400,
76
+ stop_sequence=None
77
+ ):
78
+ """
79
+ Answer a question based on the most similar context from the dataframe texts
80
+ """
81
+ context = create_context(
82
+ question,
83
+ df_combined,
84
+ max_len=max_len,
85
+ size=size,
86
+ )
87
+ # If debug, print the raw model response
88
+ if debug:
89
+ context = context['context']
90
+ print("Context:\n" + context)
91
+ print("\n\n")
92
+
93
+ try:
94
+ # Create a completions using the questin and context
95
+ response = openai.Completion.create(
96
+ prompt=f"You're an assistant of a Dr. that holds a phd in Biochemistry. You help to answer peoples questions using Dr. Dougs transcripts. Answer the question in a short but clearly understandable way given the provided transcript , and if the question can't be answered based on the transcript, say \"I don't know yet.\"\n\n \"\n\nTranscript: {context['context']}\n\n---\n\nQuestion: {question}\nAnswer:",
97
+ temperature=0,
98
+ max_tokens=max_tokens,
99
+ top_p=1,
100
+ frequency_penalty=0,
101
+ presence_penalty=0,
102
+ stop=stop_sequence,
103
+ model=model,
104
+ )
105
+ answer = response["choices"][0]["text"].strip()
106
+
107
+ return {'Answer': f'{answer}', 'Context': f'{context["context"]}','Additional_context':f'{context["add_context"]}'}
108
+ except Exception as e:
109
+ print(e)
110
+ return ""
111
+
112
+
113
+ start_sequence = "\nQuestion:"
114
+ restart_sequence = "\nAnswer: "
115
+
116
+ prompt = "Koks tinkamiausias eterinis aliejus pagerinti smegenų veiklai? Atsakyk Lietuviškai."
117
+
118
+
119
+ def chatgpt_clone(input, history):
120
+ history = history or []
121
+ s = list(sum(history, ()))
122
+ s.append(input)
123
+ inp = ' '.join(s)
124
+ output_og = answer_question(df_combined, question=f"{inp}", debug=False)
125
+ output = output_og['Answer'].replace('\n', ' ')
126
+ context = output_og['Context'].replace('\n', '<br>')
127
+ additional_context = output_og['Additional_context'].replace('\n', '<br>')
128
+ history.append((input, output))
129
+ return history, history,context, additional_context
130
+
131
+
132
+ block = gr.Blocks()
133
+
134
+
135
+ with block:
136
+ with gr.Tab("Chat"):
137
+ gr.Markdown("""<h1><center>Pokalbis su ponu D.</center></h1>
138
+ """)
139
+ chatbot = gr.Chatbot()
140
+ message = gr.Textbox(placeholder=prompt)
141
+ state = gr.Variable()
142
+ submit = gr.Button("SEND")
143
+ # df = gr.dataframe(columns=['text', 'n_tokens','embeddings'], data=[df])
144
+
145
+ with gr.Tab("Data"):
146
+ #context = gr.TextArea(label="Context")
147
+ context = gr.HTML(label="Context")
148
+
149
+ with gr.Tab("Video"):
150
+ gr.Markdown("""<h1><center>Video</center></h1>
151
+ """)
152
+ gr.Video("https://www.youtube.com/watch?v=3q3Y8ZdD0aQ")
153
+ additional_context = gr.TextArea(label="Context")
154
+
155
+
156
+
157
+ submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state, context, additional_context])
158
+
159
+
160
+ block.launch()
161
+
162
+
163
+
164
+
165
+ ##archive
166
+
167
+ # HF_TOKEN = os.getenv('HF_TOKEN')
168
+ # hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "FeedbackontalkingtoD")
169
+ #
170
+ # with gr.Blocks() as demo:
171
+ # klausimas = gr.Textbox(label="Klausimas")
172
+ # atsakymas = gr.Textbox(label="Atsakymas!")
173
+ # klausimas.change(answer_question_gr, klausimas, atsakymas)
174
+ #
175
+ #
176
+ # demo.launch()
main.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is a sample Python script.
2
+
3
+ # Press ⌃R to execute it or replace it with your code.
4
+ # Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
5
+
6
+
7
+ def print_hi(name):
8
+ # Use a breakpoint in the code line below to debug your script.
9
+ print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint.
10
+
11
+
12
+ # Press the green button in the gutter to run the script.
13
+ if __name__ == '__main__':
14
+ print_hi('PyCharm')
15
+
16
+ # See PyCharm help at https://www.jetbrains.com/help/pycharm/
packages.txt ADDED
File without changes
processed/ddd .csv ADDED
The diff for this file is too large to render. See raw diff
 
processed/embeddings.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6878e0932a911df886e624f1c7097bc425f04f8e959a18c9083fe92d45ba2d1
3
+ size 5044557
processed/embeddings_with_metadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ tiktoken
2
+ openai
3
+ pandas
4
+ numpy
5
+ plotly
6
+ scipy
7
+ sklearn
8
+ matplotlib
9
+ scikit-learn
10
+ openai[embeddings]