Spaces:

KwabsHug
/

GameConfigIdea

Running on Zero

App Files Files Community

kwabs22 commited on 6 days ago

Commit

43647c4

•

1 Parent(s): 3dc2230

RAG Placeholder demo test

Browse files

Files changed (3) hide show

README.md +2 -0
app.py +144 -12
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -7,6 +7,8 @@ sdk: gradio
 sdk_version: 4.36.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 4.36.1
 app_file: app.py
 pinned: false
+models:
+    - Qwen/Qwen2-0.5B-Instruct
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -10,12 +10,80 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 import time
-# Initialize the zero tensor on CUDA
 zero = torch.Tensor([0]).cuda()
 print(zero.device)  # This will print 'cpu' outside the @spaces.GPU decorated function
-# Load the model and tokenizer
 llmguide_model = AutoModelForCausalLM.from_pretrained(
     "Qwen/Qwen2-0.5B-Instruct",
     torch_dtype="auto",
@@ -23,6 +91,29 @@ llmguide_model = AutoModelForCausalLM.from_pretrained(
 )
 llmguide_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 @spaces.GPU
 def llmguide_generate_response(prompt, stream=False):
     print(zero.device)  # This will print 'cuda:0' inside the @spaces.GPU decorated function
@@ -58,7 +149,10 @@ def llmguide_generate_response(prompt, stream=False):
             total_tokens += 1
             current_time = time.time()
             tokens_per_second = total_tokens / (current_time - start_time)
-            yield generated_text, f"{tokens_per_second:.2f}"
     else:
         generated_ids = llmguide_model.generate(
             model_inputs.input_ids,
@@ -71,7 +165,19 @@ def llmguide_generate_response(prompt, stream=False):
         total_tokens = len(generated_ids[0])
         end_time = time.time()
         tokens_per_second = total_tokens / (end_time - start_time)
-        yield response, f"{tokens_per_second:.2f}"
 #--------------------------------------------------------------------------------------------------------------------------------
@@ -718,12 +824,25 @@ with gr.Blocks() as demo:
     with gr.Accordion("Qwen 0.5B as Space Guide Tests", open=False):
         gr.HTML("Placeholder for FAQ type - front end as prompt engineering for the first message to force direction of conversion")
         gr.HTML("Placeholder for weak RAG Type - Explanations through an opensource embeddings engine")
         ("Placeholder for https://huggingface.co/h2oai/h2o-danube3-500m-chat-GGUF as alternative")
         ("Placeholder for qwen 2 72b as alternative use checkbox and gradio client api call")
         gr.Markdown("# Qwen-0.5B-Instruct Language Model")
         gr.Markdown("This demo uses the Qwen-0.5B-Instruct model to generate responses based on your input.")
-        gr.HTML("Example prompt: <br>I am writing a story about a chef. please write dishes to appear on the menu. <br>What are the most common decisions that a chef story would include?")
-        gr.HTML("Continue this config - Paste any complete block of the config")
         with gr.Row():
             with gr.Column():
@@ -976,7 +1095,7 @@ Creating more diverse paths through the game""")
         with gr.Tab("Main areas of considerations"):
             with gr.Tab("Mermaid Graphs and Nesting"):
-                gr.HTML("Claude Artifacts to illustrate <br> Nsted Structure - https://claude.site/artifacts/4a910d81-1541-49f4-8531-4f27fe56cd1e <br> https://claude.site/artifacts/265e9242-2093-46e1-9011-ed6ad938be90?fullscreen=false <br> ")
                 gr.HTML("")
             with gr.Tab(""):
                 gr.HTML("")
@@ -1713,7 +1832,7 @@ Would you like me to elaborate on any of these ideas or show how to implement th
                     ewpgenerate_button.click(generate_story_and_timeline, inputs=[ewpgenerate_no_ui_timeline_points, ewpgenerate_no_media_timeline_points, ewpgenerate_with_media_check], outputs=[ewptimeline_output_with_assets, ewptimeline_output, ewpstory_output, ewpwacustom_config, ewpgame_structure_output_text]) #ewpgame_structure_output_text_with_media, ewpgame_structure_output_text])
     with gr.Tab("Asset Generation Considerations"):
-        gr.HTML("With some asset category ideas from gemini-1.5-flash-api-0514 and reka-flash-preview-20240611 <br><br>Licenses for the spaces still to be evaluated - June 2024")
         with gr.Accordion("LLM HF Spaces/Sites (Click Here to Open) - Ask for a story and suggestions based on the autoconfig", open=False):
             with gr.Row():
@@ -1732,17 +1851,30 @@ Would you like me to elaborate on any of these ideas or show how to implement th
             with gr.Tab("Maths"):
                 gr.HTML("https://huggingface.co/spaces/AI-MO/math-olympiad-solver")
         with gr.Tab("Images"):
             with gr.Accordion("Image Gen or Animation HF Spaces/Sites (Click Here to Open) - Have to download and upload at the the top", open=False):
                 # with gr.Tabs("General"):
                 with gr.Row():
-                    linktoimagegen = gr.Dropdown(choices=["--General--", "https://pixart-alpha-pixart-sigma.hf.space", "https://stabilityai-stable-diffusion-3-medium.hf.space", "https://gokaygokay-kolors.hf.space", "https://prodia-sdxl-stable-diffusion-xl.hf.space", "https://prodia-fast-stable-diffusion.hf.space", "https://bytedance-hyper-sdxl-1step-t2i.hf.space",  "https://multimodalart-cosxl.hf.space", "https://cagliostrolab-animagine-xl-3-1.hf.space", "https://stabilityai-stable-diffusion.hf.space",
                                                             "--Speed--", "https://radames-real-time-text-to-image-sdxl-lightning.hf.space", "https://ap123-sdxl-lightning.hf.space",
                                                             "--LORA Support--", "https://artificialguybr-artificialguybr-demo-lora.hf.space", "https://artificialguybr-studio-ghibli-lora-sdxl.hf.space", "https://artificialguybr-pixel-art-generator.hf.space", "https://fffiloni-sdxl-control-loras.hf.space", "https://ehristoforu-dalle-3-xl-lora-v2.hf.space",
                                                             "--Image to Image--", "https://lllyasviel-ic-light.hf.space", "https://gparmar-img2img-turbo-sketch.hf.space",
                                                             "--Control of Pose--", "https://instantx-instantid.hf.space", "https://modelscope-transferanything.hf.space", "https://okaris-omni-zero.hf.space"
                                                             "--Control of Shapes--", "https://linoyts-scribble-sdxl-flash.hf.space",
-                                                            "--Foreign Language Input--", ""], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True)
                     imagegenspacebtn = gr.Button("Use the chosen URL to load interface with a image generation model")
                 imagegenspace = gr.HTML("Image Space Chosen will load here")
@@ -1784,7 +1916,7 @@ Would you like me to elaborate on any of these ideas or show how to implement th
                 gr.HTML("Placeholder for models small enough to run on cpu here in this space that can assist")
         with gr.Tab("Audio"):
-            with gr.Accordion("3D Model Spaces/Sites (Click Here to Open) - Image to Blender?", open=False):
                 with gr.Row():
                     linktoaudiiogenspace = gr.Dropdown(choices=["General", "https://artificialguybr-stable-audio-open-zero.hf.space", "",
                                                                 "--Talking Portrait--","https://fffiloni-tts-hallo-talking-portrait.hf.space"],

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 import time
+import psutil
+from sentence_transformers import SentenceTransformer
+# # Initialize the zero tensor on CUDA
+# zero = torch.Tensor([0]).cuda()
+# print(zero.device)  # This will print 'cpu' outside the @spaces.GPU decorated function
+# # Load the model and tokenizer
+# llmguide_model = AutoModelForCausalLM.from_pretrained(
+#     "Qwen/Qwen2-0.5B-Instruct",
+#     torch_dtype="auto",
+#     device_map="auto"
+# )
+# llmguide_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
+# @spaces.GPU
+# def llmguide_generate_response(prompt, stream=False):
+#     print(zero.device)  # This will print 'cuda:0' inside the @spaces.GPU decorated function
+#     messages = [
+#         {"role": "system", "content": "You are a helpful assistant."},
+#         {"role": "user", "content": prompt}
+#     ]
+#     text = llmguide_tokenizer.apply_chat_template(
+#         messages,
+#         tokenize=False,
+#         add_generation_prompt=True
+#     )
+#     model_inputs = llmguide_tokenizer([text], return_tensors="pt").to(zero.device)
+#     start_time = time.time()
+#     total_tokens = 0
+#     if stream:
+#         streamer = TextIteratorStreamer(llmguide_tokenizer, skip_special_tokens=True)
+#         generation_kwargs = dict(
+#             model_inputs,
+#             streamer=streamer,
+#             max_new_tokens=512,
+#             temperature=0.7,
+#         )
+#         thread = Thread(target=llmguide_model.generate, kwargs=generation_kwargs)
+#         thread.start()
+#         generated_text = ""
+#         for new_text in streamer:
+#             generated_text += new_text
+#             total_tokens += 1
+#             current_time = time.time()
+#             tokens_per_second = total_tokens / (current_time - start_time)
+#             yield generated_text, f"{tokens_per_second:.2f}"
+#     else:
+#         generated_ids = llmguide_model.generate(
+#             model_inputs.input_ids,
+#             max_new_tokens=512
+#         )
+#         generated_ids = [
+#             output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+#         ]
+#         response = llmguide_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+#         total_tokens = len(generated_ids[0])
+#         end_time = time.time()
+#         tokens_per_second = total_tokens / (end_time - start_time)
+#         yield response, f"{tokens_per_second:.2f}"
+# Initialize GPU tensor
 zero = torch.Tensor([0]).cuda()
 print(zero.device)  # This will print 'cpu' outside the @spaces.GPU decorated function
+# Load the embedding model
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Load the Qwen model and tokenizer
 llmguide_model = AutoModelForCausalLM.from_pretrained(
     "Qwen/Qwen2-0.5B-Instruct",
     torch_dtype="auto",
 )
 llmguide_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
+# Sample knowledge base (replace with your own data)
+knowledge_base = [
+    "The capital of France is Paris.",
+    "Python is a popular programming language.",
+    "Machine learning is a subset of artificial intelligence.",
+    "The Earth orbits around the Sun.",
+]
+# Create embeddings for the knowledge base
+knowledge_base_embeddings = embedding_model.encode(knowledge_base)
+def retrieve(query, k=2):
+    query_embedding = embedding_model.encode([query])
+    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding), torch.tensor(knowledge_base_embeddings))
+    top_k_indices = similarities.argsort(descending=True)[:k]
+    return [knowledge_base[i] for i in top_k_indices]
+def get_resource_usage():
+    ram_usage = psutil.virtual_memory().percent
+    gpu_memory_allocated = torch.cuda.memory_allocated() / (1024 ** 3)  # Convert to GB
+    gpu_memory_reserved = torch.cuda.memory_reserved() / (1024 ** 3)  # Convert to GB
+    return f"RAM Usage: {ram_usage:.2f}%, GPU Memory Allocated: {gpu_memory_allocated:.2f}GB, GPU Memory Reserved: {gpu_memory_reserved:.2f}GB"
 @spaces.GPU
 def llmguide_generate_response(prompt, stream=False):
     print(zero.device)  # This will print 'cuda:0' inside the @spaces.GPU decorated function
             total_tokens += 1
             current_time = time.time()
             tokens_per_second = total_tokens / (current_time - start_time)
+            yield generated_text, f"{tokens_per_second:.2f}", ""
+        resource_usage = get_resource_usage()
+        yield generated_text, f"{tokens_per_second:.2f}", resource_usage
     else:
         generated_ids = llmguide_model.generate(
             model_inputs.input_ids,
         total_tokens = len(generated_ids[0])
         end_time = time.time()
         tokens_per_second = total_tokens / (end_time - start_time)
+        resource_usage = get_resource_usage()
+        yield response, f"{tokens_per_second:.2f}", resource_usage
+    # Clear CUDA cache
+    # torch.cuda.empty_cache()
+    # gc.collect()
+def rag(query, stream=False):
+    retrieved_docs = retrieve(query)
+    context = " ".join(retrieved_docs)
+    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
+    return llmguide_generate_response(prompt, stream)
 #--------------------------------------------------------------------------------------------------------------------------------
     with gr.Accordion("Qwen 0.5B as Space Guide Tests", open=False):
         gr.HTML("Placeholder for FAQ type - front end as prompt engineering for the first message to force direction of conversion")
         gr.HTML("Placeholder for weak RAG Type - Explanations through an opensource embeddings engine")
+        gr.Interface(
+            fn=rag,
+            inputs=[
+                gr.Textbox(lines=2, placeholder="Enter your question here..."),
+                gr.Checkbox(label="Stream output")
+            ],
+            outputs=[
+                gr.Textbox(label="Generated Response"),
+                gr.Textbox(label="Tokens per second"),
+                gr.Textbox(label="Resource Usage")
+            ],
+            title="RAG Q&A System with GPU Acceleration and Resource Monitoring",
+            description="Ask a question and get an answer based on the retrieved context. The response is generated using a GPU-accelerated model. Resource usage is logged at the end of generation."
+        )
         ("Placeholder for https://huggingface.co/h2oai/h2o-danube3-500m-chat-GGUF as alternative")
         ("Placeholder for qwen 2 72b as alternative use checkbox and gradio client api call")
         gr.Markdown("# Qwen-0.5B-Instruct Language Model")
         gr.Markdown("This demo uses the Qwen-0.5B-Instruct model to generate responses based on your input.")
+        gr.HTML("Example prompts: <br>I am writing a story about a chef. please write dishes to appear on the menu. <br>What are the most common decisions that a chef story would include? <br>What are the kinds problems that a chef story would include? <br>What are the kinds of out of reach goals that a chef story would include? <br>Continue this config - Paste any complete block of the config")
         with gr.Row():
             with gr.Column():
         with gr.Tab("Main areas of considerations"):
             with gr.Tab("Mermaid Graphs and Nesting"):
+                gr.HTML("Claude Artifacts to illustrate nested structure brainstorms - <br> https://claude.site/artifacts/4a910d81-1541-49f4-8531-4f27fe56cd1e <br> https://claude.site/artifacts/265e9242-2093-46e1-9011-ed6ad938be90?fullscreen=false <br> ")
                 gr.HTML("")
             with gr.Tab(""):
                 gr.HTML("")
                     ewpgenerate_button.click(generate_story_and_timeline, inputs=[ewpgenerate_no_ui_timeline_points, ewpgenerate_no_media_timeline_points, ewpgenerate_with_media_check], outputs=[ewptimeline_output_with_assets, ewptimeline_output, ewpstory_output, ewpwacustom_config, ewpgame_structure_output_text]) #ewpgame_structure_output_text_with_media, ewpgame_structure_output_text])
     with gr.Tab("Asset Generation Considerations"):
+        gr.HTML("With some asset category ideas from gemini-1.5-flash-api-0514 and reka-flash-preview-20240611 <br><br>Licenses for the spaces still to be evaluated - June 2024 <br> Users to follow with cool spaces - https://huggingface.co/fffiloni, https://huggingface.co/artificialguybr, https://huggingface.co/radames, https://huggingface.co/multimodalart, ")
         with gr.Accordion("LLM HF Spaces/Sites (Click Here to Open) - Ask for a story and suggestions based on the autoconfig", open=False):
             with gr.Row():
             with gr.Tab("Maths"):
                 gr.HTML("https://huggingface.co/spaces/AI-MO/math-olympiad-solver")
+        with gr.Tab("Media Understanding"):
+            gr.HTML("NPC Response Engines? Camera, Shopkeeper, Companion, Enemies, etc.")
+            with gr.Accordion("Media understanding model Spaces/Sites (Click Here to Open) - Image to Blender?", open=False):
+                with gr.Row():
+                    linktomediaunderstandingspace = gr.Dropdown(choices=[ "--Weak Audio Understanding = Audio to text, Weak Video Understanding = Video to Image to Image Understanding", "https://skalskip-florence-2-video.hf.space", "https://kingnish-opengpt-4o.hf.space",
+                                                                            "--Image Understanding--", "https://qnguyen3-nanollava.hf.space", "https://skalskip-better-florence-2.hf.space", ],
+                                                    label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True)
+                    mediaunderstandingspacebtn = gr.Button("Use the chosen URL to load interface with a media understanding space")
+                mediaunderstandingspace = gr.HTML("Audio Space Chosen will load here")
+                mediaunderstandingspacebtn.click(display_website, inputs=linktomediaunderstandingspace, outputs=mediaunderstandingspace)
         with gr.Tab("Images"):
             with gr.Accordion("Image Gen or Animation HF Spaces/Sites (Click Here to Open) - Have to download and upload at the the top", open=False):
                 # with gr.Tabs("General"):
                 with gr.Row():
+                    linktoimagegen = gr.Dropdown(choices=["Text-Interleaved", "https://ethanchern-anole.hf.space",
+                                                            "--General--", "https://pixart-alpha-pixart-sigma.hf.space", "https://stabilityai-stable-diffusion-3-medium.hf.space", "https://prodia-sdxl-stable-diffusion-xl.hf.space", "https://prodia-fast-stable-diffusion.hf.space", "https://bytedance-hyper-sdxl-1step-t2i.hf.space",  "https://multimodalart-cosxl.hf.space", "https://cagliostrolab-animagine-xl-3-1.hf.space", "https://stabilityai-stable-diffusion.hf.space",
                                                             "--Speed--", "https://radames-real-time-text-to-image-sdxl-lightning.hf.space", "https://ap123-sdxl-lightning.hf.space",
                                                             "--LORA Support--", "https://artificialguybr-artificialguybr-demo-lora.hf.space", "https://artificialguybr-studio-ghibli-lora-sdxl.hf.space", "https://artificialguybr-pixel-art-generator.hf.space", "https://fffiloni-sdxl-control-loras.hf.space", "https://ehristoforu-dalle-3-xl-lora-v2.hf.space",
                                                             "--Image to Image--", "https://lllyasviel-ic-light.hf.space", "https://gparmar-img2img-turbo-sketch.hf.space",
                                                             "--Control of Pose--", "https://instantx-instantid.hf.space", "https://modelscope-transferanything.hf.space", "https://okaris-omni-zero.hf.space"
                                                             "--Control of Shapes--", "https://linoyts-scribble-sdxl-flash.hf.space",
+                                                            "--Foreign Language Input--", "https://gokaygokay-kolors.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True)
                     imagegenspacebtn = gr.Button("Use the chosen URL to load interface with a image generation model")
                 imagegenspace = gr.HTML("Image Space Chosen will load here")
                 gr.HTML("Placeholder for models small enough to run on cpu here in this space that can assist")
         with gr.Tab("Audio"):
+            with gr.Accordion("Audio Spaces/Sites (Click Here to Open) - Image to Blender?", open=False):
                 with gr.Row():
                     linktoaudiiogenspace = gr.Dropdown(choices=["General", "https://artificialguybr-stable-audio-open-zero.hf.space", "",
                                                                 "--Talking Portrait--","https://fffiloni-tts-hallo-talking-portrait.hf.space"],

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 transformers
-accelerate

 transformers
+accelerate
+sentence-transformers