Spaces:

Arnasltlt
/

KlauskD

Runtime error

App Files Files Community

Arnasltlt commited on Mar 1, 2023

Commit

e5dcef9

•

1 Parent(s): 283c184

d

Browse files

faa

Files changed (9) hide show

.gitattributes +1 -0
README.md +6 -5
app.py +176 -0
main.py +16 -0
packages.txt +0 -0
processed/ddd .csv +0 -0
processed/embeddings.csv +3 -0
processed/embeddings_with_metadata.csv +0 -0
requirements.txt +10 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+processed/embeddings.csv filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: KlauskD
-emoji: 🚀
-colorFrom: purple
-colorTo: indigo
 sdk: gradio
-sdk_version: 3.19.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: QandA
+emoji: 🏃
+colorFrom: indigo
+colorTo: green
 sdk: gradio
+sdk_version: 3.18.0
 app_file: app.py
 pinned: false
+duplicated_from: Arnasltlt/QandA
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+import gradio as gr
+import openai
+import pandas as pd
+from openai.embeddings_utils import distances_from_embeddings
+openai.api_key = os.environ["openai_key"]
+final_file = 'processed/embeddings_with_metadata.csv'
+# Load the combined DataFrame
+df_combined = pd.read_csv(final_file, index_col=0)
+# Convert the 'embeddings' column from a string to a list
+df_combined['embeddings'] = df_combined['embeddings'].apply(eval)
+# ################################################################################
+# ### Step 12
+# ################################################################################
+def create_context(
+        question, df_combined, max_len=1800, size="ada"
+):
+    """
+    Create a context for a question by finding the most similar context from the dataframe
+    """
+    # Get the embeddings for the question
+    q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
+    # Get the distances from the embeddings
+    df_combined['distances'] = distances_from_embeddings(q_embeddings, df_combined['embeddings'].values,
+                                                         distance_metric='cosine')
+    # additional_context = {'file_name':df_combined['fname'],'start':df_combined['start'],'end':df_combined['end']}
+    # print(additional_context)
+    returns = []
+    cur_len = 0
+    additional_context_list = []
+    for i, row in df_combined.sort_values('distances', ascending=True).iterrows():
+        print(i)
+        df_old = pd.read_csv('processed/ddd .csv')
+        try:
+            additional_context = {"fname_value": df_old.at[i, 'fname'], "start": df_old.at[i, 'start'],
+                                  "end": df_old.at[i, 'end']}
+        except KeyError:
+            print(f"KeyError: {i} is not a valid index value")
+            continue
+        additional_context_list.append(additional_context)
+        # Add the length of the text to the current length
+        cur_len += row['n_tokens'] + 4
+        # If the context is too long, break
+        if cur_len > max_len:
+            break
+        # Else add it to the text that is being returned
+        returns.append(row["text"])
+    print(additional_context_list)
+    # Return the context and additional context as a dictionary
+    context = "\n\n###\n\n".join(returns)
+    return {'context': context, "add_context": additional_context_list}
+def answer_question(
+        df_combined,
+        model="text-davinci-003",
+        question="",
+        max_len=2500,
+        size="ada",
+        debug=False,
+        max_tokens=400,
+        stop_sequence=None
+):
+    """
+    Answer a question based on the most similar context from the dataframe texts
+    """
+    context = create_context(
+        question,
+        df_combined,
+        max_len=max_len,
+        size=size,
+    )
+    # If debug, print the raw model response
+    if debug:
+        context = context['context']
+        print("Context:\n" + context)
+        print("\n\n")
+    try:
+        # Create a completions using the questin and context
+        response = openai.Completion.create(
+            prompt=f"You're an assistant of a Dr. that holds a phd in Biochemistry. You help to answer peoples questions using Dr. Dougs transcripts. Answer the question in a short but clearly understandable way given the provided transcript , and if the question can't be answered based on the transcript, say \"I don't know yet.\"\n\n \"\n\nTranscript: {context['context']}\n\n---\n\nQuestion: {question}\nAnswer:",
+            temperature=0,
+            max_tokens=max_tokens,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0,
+            stop=stop_sequence,
+            model=model,
+        )
+        answer = response["choices"][0]["text"].strip()
+        return {'Answer': f'{answer}', 'Context': f'{context["context"]}','Additional_context':f'{context["add_context"]}'}
+    except Exception as e:
+        print(e)
+        return ""
+start_sequence = "\nQuestion:"
+restart_sequence = "\nAnswer: "
+prompt = "Koks tinkamiausias eterinis aliejus pagerinti smegenų veiklai? Atsakyk Lietuviškai."
+def chatgpt_clone(input, history):
+    history = history or []
+    s = list(sum(history, ()))
+    s.append(input)
+    inp = ' '.join(s)
+    output_og = answer_question(df_combined, question=f"{inp}", debug=False)
+    output = output_og['Answer'].replace('\n', ' ')
+    context = output_og['Context'].replace('\n', '<br>')
+    additional_context = output_og['Additional_context'].replace('\n', '<br>')
+    history.append((input, output))
+    return history, history,context, additional_context
+block = gr.Blocks()
+with block:
+    with gr.Tab("Chat"):
+        gr.Markdown("""<h1><center>Pokalbis su ponu D.</center></h1>
+        """)
+        chatbot = gr.Chatbot()
+        message = gr.Textbox(placeholder=prompt)
+        state = gr.Variable()
+        submit = gr.Button("SEND")
+        # df = gr.dataframe(columns=['text', 'n_tokens','embeddings'], data=[df])
+    with gr.Tab("Data"):
+        #context = gr.TextArea(label="Context")
+        context = gr.HTML(label="Context")
+    with gr.Tab("Video"):
+        gr.Markdown("""<h1><center>Video</center></h1>
+        """)
+        gr.Video("https://www.youtube.com/watch?v=3q3Y8ZdD0aQ")
+        additional_context = gr.TextArea(label="Context")
+    submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state, context, additional_context])
+block.launch()
+##archive
+# HF_TOKEN = os.getenv('HF_TOKEN')
+# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "FeedbackontalkingtoD")
+#
+# with gr.Blocks() as demo:
+#     klausimas = gr.Textbox(label="Klausimas")
+#     atsakymas = gr.Textbox(label="Atsakymas!")
+#     klausimas.change(answer_question_gr, klausimas, atsakymas)
+#
+#
+# demo.launch()

main.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# This is a sample Python script.
+# Press ⌃R to execute it or replace it with your code.
+# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press ⌘F8 to toggle the breakpoint.
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/

packages.txt ADDED Viewed

File without changes

processed/ddd .csv ADDED Viewed

The diff for this file is too large to render. See raw diff

processed/embeddings.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6878e0932a911df886e624f1c7097bc425f04f8e959a18c9083fe92d45ba2d1
+size 5044557

processed/embeddings_with_metadata.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+tiktoken
+openai
+pandas
+numpy
+plotly
+scipy
+sklearn
+matplotlib
+scikit-learn
+openai[embeddings]