Spaces:

yiyii
/

RAG-3

Running

App Files Files Community

yiyii commited on May 13

Commit

2e99c12

•

1 Parent(s): e242b04

fourth

Browse files

Files changed (1) hide show

app.py +60 -7

app.py CHANGED Viewed

@@ -88,14 +88,40 @@ load_vector_store = Chroma(persist_directory="stores/story_cosine", embedding_fu
 # persist_directory="stores/story_cosine": laod the existing vector store form "stores/story_cosine"
 # embedding_function=embeddings: using the bge embedding model when add the new data to the vector store
-# Only get the 3 most similar document from the dataset
-retriever = load_vector_store.as_retriever(search_kwargs={"k":3})
 client = InferenceClient(
     "mistralai/Mistral-7B-Instruct-v0.1"
 )
 def generate(image, temperature=0.9, max_new_tokens=1500, top_p=0.95, repetition_penalty=1.0):
     image_caption, gender, age, emotion = get_image_info(image)
     print("............................................")
     print("image_caption:", image_caption)
@@ -180,7 +206,7 @@ demo = gr.Interface(fn=generate,
             gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
             gr.Slider(
-                label="Temperature",
                 value=0.9,
                 minimum=0.0,
                 maximum=1.0,
@@ -190,7 +216,7 @@ demo = gr.Interface(fn=generate,
             ),
             gr.Slider(
-                label="Max new tokens",
                 value=1500,
                 minimum=0,
                 maximum=3000,
@@ -199,7 +225,7 @@ demo = gr.Interface(fn=generate,
                 info="The maximum numbers of new tokens"),
             gr.Slider(
-                label="Top-p (nucleus sampling)",
                 value=0.90,
                 minimum=0.0,
                 maximum=1,
@@ -208,13 +234,40 @@ demo = gr.Interface(fn=generate,
                 info="Higher values sample more low-probability tokens",
             ),
             gr.Slider(
-                label="Repetition penalty",
                 value=1.2,
                 minimum=1.0,
                 maximum=2.0,
                 step=0.05,
                 interactive=True,
                 info="Penalize repeated tokens",
             )
         ],
         outputs=[gr.Textbox(label="Generated Story")],

 # persist_directory="stores/story_cosine": laod the existing vector store form "stores/story_cosine"
 # embedding_function=embeddings: using the bge embedding model when add the new data to the vector store
 client = InferenceClient(
     "mistralai/Mistral-7B-Instruct-v0.1"
 )
 def generate(image, temperature=0.9, max_new_tokens=1500, top_p=0.95, repetition_penalty=1.0):
+    # load the txt file
+    with open("story.txt", "r") as f:
+        # r: read mode, reading only
+        state_of_the_union = f.read()
+        # read the file into a single string
+    # split the content into chunks
+    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    # TokenTextSplitter() can ensure the integrity of words
+    # each chunk to overlap with the previous chunk by 20 tokens
+    texts = text_splitter.split_text(state_of_the_union)
+    print("...........................................")
+    # print the first chunk
+    print("text[0]: ", texts[0])
+    # create embeddings for chunks by using bge model, and then save these vectors into chroma vector database
+    # use hnsw(hierarchical navigable small world) index to facilitate efficient searching
+    # use cosine similarity to measure similiarity.(similarity is crucial in performing similarity search.)
+    # hnsw: builds a graph-based index for approximate nearest neighber searches.
+    # hnsw is used for organizing the data into an efficient structure that supports rapid retrieval operations(speed up the search).
+    # cosine similarity is used for telling the hnsw algorithm how to measure the distance between vectors.
+    # by setting space to cosine space, the index will operate using cosine similarity to measuer the vectors' similarity.
+    vector_store = Chroma.from_texts(texts, embeddings, collection_metadata = {"hnsw:space":"cosine"}, persist_directory="stores/story_cosine" )
+    print("vector store created........................")
+    load_vector_store = Chroma(persist_directory="stores/story_cosine", embedding_function=embeddings)
+    # persist_directory="stores/story_cosine": laod the existing vector store form "stores/story_cosine"
+    # embedding_function=embeddings: using the bge embedding model when add the new data to the vector store
+    # Only get the 3 most similar document from the dataset
+    retriever = load_vector_store.as_retriever(search_kwargs={"k":top-k})
     image_caption, gender, age, emotion = get_image_info(image)
     print("............................................")
     print("image_caption:", image_caption)
             gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
             gr.Slider(
+                label="temperature",
                 value=0.9,
                 minimum=0.0,
                 maximum=1.0,
             ),
             gr.Slider(
+                label="max new tokens",
                 value=1500,
                 minimum=0,
                 maximum=3000,
                 info="The maximum numbers of new tokens"),
             gr.Slider(
+                label="top-p (nucleus sampling)",
                 value=0.90,
                 minimum=0.0,
                 maximum=1,
                 info="Higher values sample more low-probability tokens",
             ),
             gr.Slider(
+                label="repetition penalty",
                 value=1.2,
                 minimum=1.0,
                 maximum=2.0,
                 step=0.05,
                 interactive=True,
                 info="Penalize repeated tokens",
+            ),
+            gr.Slider(
+                label="chunk_size",
+                value=200,
+                minimum=50,
+                maximum=500,
+                step=1.0,
+                interactive=True,
+                info="Length of retrieved chunks",
+            ),
+            gr.Slider(
+                label="chunk_overlap",
+                value=20,
+                minimum=0,
+                maximum=50,
+                step=1.0,
+                interactive=True,
+                info="Number of overlappong words between chunks",
+            ),
+            gr.Slider(
+                label="top-k",
+                value=3,
+                minimum=10,
+                maximum=,
+                step=1.0,
+                interactive=True,
+                info="Number of top relevant documents to retrieve",
             )
         ],
         outputs=[gr.Textbox(label="Generated Story")],