Spaces:

EpistemeAI
/

agent-llama-gradio

Runtime error

App Files Files Community

legolasyiu commited on Oct 16, 2024

Commit

87542db

•

1 Parent(s): eb457e4

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -1

app.py CHANGED Viewed

@@ -4,6 +4,82 @@ import torch
 import gradio as gr
-demo = gr.load("EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", src="models")
 demo.launch()

 import gradio as gr
+from langchain import hub
+from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools
+from langchain_openai import ChatOpenAI
+from gradio import ChatMessage
+import gradio as gr
+from dotenv import load_dotenv
+load_dotenv()
+# Environment variables
+HF_TOKEN = os.environ.get('HF_TOKEN')  # Ensure token is set
+#model = ChatOpenAI(temperature=0, streaming=True)
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.chat_models.huggingface import ChatHuggingFace
+from transformers import BitsAndBytesConfig
+#quantization to 8bit, must have GPU.
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype="float16",
+    bnb_4bit_use_double_quant=True,
+)
+# 2. Create model
+model = HuggingFacePipeline.from_model_id(
+    model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003",
+    task="text-generation",
+    pipeline_kwargs=dict(
+        max_new_tokens=2048,
+        do_sample=False,
+        repetition_penalty=1.03,
+        return_full_text=False,
+    ),
+    model_kwargs={"quantization_config": quantization_config},
+)
+tools = load_tools(["serpapi"])
+# Get the prompt to use - you can modify this!
+prompt = hub.pull("hwchase17/openai-tools-agent")
+# print(prompt.messages) -- to see the prompt
+agent = create_openai_tools_agent(
+    model.with_config({"tags": ["agent_llm"]}), tools, prompt
+)
+agent_executor = AgentExecutor(agent=agent, tools=tools).with_config(
+    {"run_name": "Agent"}
+)
+async def interact_with_langchain_agent(prompt, messages):
+    messages.append(ChatMessage(role="user", content=prompt))
+    yield messages
+    async for chunk in agent_executor.astream(
+        {"input": prompt}
+    ):
+        if "steps" in chunk:
+            for step in chunk["steps"]:
+                messages.append(ChatMessage(role="assistant", content=step.action.log,
+                                  metadata={"title": f"🛠️ Used tool {step.action.tool}"}))
+                yield messages
+        if "output" in chunk:
+            messages.append(ChatMessage(role="assistant", content=chunk["output"]))
+            yield messages
+with gr.Blocks() as demo:
+    gr.Markdown("# Chat with a LangChain Agent 🦜⛓️ and see its thoughts 💭")
+    chatbot = gr.Chatbot(
+        type="messages",
+        label="Agent",
+        avatar_images=(
+            None,
+            "https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png",
+        ),
+    )
+    input = gr.Textbox(lines=1, label="Chat Message")
+    input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2])
 demo.launch()