embedding

Sleeping

App Files Files Community

zhengr commited on Jun 16

Commit

d6611b3

•

1 Parent(s): a077c9d

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +67 -0
README.md +3 -3
app.py +1 -0
entrypoint.sh +20 -0
ollama-api-demo.ipynb +220 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,67 @@

+# Builder stage
+# FROM ubuntu:latest
+# # Update packages and install curl and gnupg
+# RUN apt-get update && apt-get install -y \
+#     curl \
+#     gnupg
+# # Add NVIDIA package repositories
+# RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+#     && echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list
+# # Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
+# RUN apt-get update && apt-get install -y nvidia-container-toolkit || true
+# # Install application
+# RUN curl https://ollama.ai/install.sh | sh
+# # Below is to fix embedding bug as per
+# # RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh
+# # Create the directory and give appropriate permissions
+# RUN mkdir -p /.ollama && chmod 777 /.ollama
+# WORKDIR /.ollama
+# # Copy the entry point script
+# COPY entrypoint.sh /entrypoint.sh
+# RUN chmod +x /entrypoint.sh
+# # Set the entry point script as the default command
+# ENTRYPOINT ["/entrypoint.sh"]
+# CMD ["ollama", "serve"]
+# # Set the model as an environment variable (this can be overridden)
+# ENV model=${model}
+# Expose the server port
+# Use the official Ollama Docker image as the base image
+FROM ollama/ollama:latest
+RUN apt update && apt install -y python3 && apt install -y python3-pip
+RUN pip install litellm
+RUN pip install 'litellm[proxy]'
+# Create a directory for Ollama data
+RUN mkdir -p /.ollama
+RUN chmod -R 777 /.ollama
+WORKDIR /.ollama
+# Copy the entry point script
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+# Set the entry point script as the default command
+ENTRYPOINT ["/entrypoint.sh"]
+# Set the model as an environment variable (this can be overridden)
+ENV model=${model}
+# Expose the port that Ollama runs on
+EXPOSE 7860
+# Command to start the Ollama server
+CMD ["serve"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Ollama Server
-emoji: 🦀
-colorFrom: yellow
-colorTo: yellow
 sdk: docker
 pinned: false
 license: apache-2.0

 ---
 title: Ollama Server
+emoji: 🦙⚡︎
+colorFrom: blue
+colorTo: gray
 sdk: docker
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ print("running server....")

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/bin/bash
+# Starting server
+echo "Starting server"
+ollama serve &
+sleep 1
+# Splitting the models by comma and pulling each
+IFS=',' read -ra MODELS <<< "$model"
+for m in "${MODELS[@]}"; do
+    echo "Pulling $m"
+    ollama pull "$m"
+    sleep 5
+    echo "Running $m"
+    ollama run "$m" --keepalive -1s
+    # No need to sleep here unless you want to give some delay between each pull for some reason
+done
+# Keep the script running to prevent the container from exiting
+wait

ollama-api-demo.ipynb ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install openai --upgrade"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## API Response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.839736985Z\",\"response\":\"```\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.859007873Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.878431213Z\",\"response\":\"def\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.897784641Z\",\"response\":\" add\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.91718876Z\",\"response\":\"(\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.936866527Z\",\"response\":\"a\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.95776024Z\",\"response\":\",\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.979133947Z\",\"response\":\" b\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.000494731Z\",\"response\":\"):\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.021318934Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.041779731Z\",\"response\":\"   \",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.062190588Z\",\"response\":\" return\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.082505875Z\",\"response\":\" a\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.102662719Z\",\"response\":\" +\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.122760355Z\",\"response\":\" b\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.142907745Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.163285108Z\",\"response\":\"```\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.18370624Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.203963933Z\",\"response\":\"Example\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.224025854Z\",\"response\":\" usage\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.244386112Z\",\"response\":\":\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.264846213Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.285448321Z\",\"response\":\"```\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.305657169Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.325782131Z\",\"response\":\"print\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.346353022Z\",\"response\":\"(\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.366430166Z\",\"response\":\"add\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.386881006Z\",\"response\":\"(\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.406680624Z\",\"response\":\"3\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.426827031Z\",\"response\":\",\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.447157302Z\",\"response\":\" \",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.467234406Z\",\"response\":\"5\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.487442969Z\",\"response\":\"))\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.50753674Z\",\"response\":\" #\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.527739408Z\",\"response\":\" Output\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.54789446Z\",\"response\":\":\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.568672362Z\",\"response\":\" \",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.591076535Z\",\"response\":\"8\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.614757129Z\",\"response\":\"\\n\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.637841098Z\",\"response\":\"```\",\"done\":false}\n",
+      "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.660407109Z\",\"response\":\"\",\"done\":true,\"context\":[518,25580,29962,3532,14816,29903,29958,5299,829,14816,29903,6778,13,13,6113,5132,775,304,788,29871,29906,3694,518,29914,25580,29962,13,28956,13,1753,788,29898,29874,29892,289,1125,13,1678,736,263,718,289,13,28956,13,14023,8744,29901,13,28956,13,2158,29898,1202,29898,29941,29892,29871,29945,876,396,10604,29901,29871,29947,13,28956],\"total_duration\":10037918982,\"load_duration\":9097178085,\"prompt_eval_count\":28,\"prompt_eval_duration\":119308000,\"eval_count\":41,\"eval_duration\":820449000}\n"
+     ]
+    }
+   ],
+   "source": [
+    "!curl https://thewise-ollama-server.hf.space/api/generate -d '''{\"model\": \"codellama\",\"prompt\":\"Write Python code to add 2 numbers\"}'''"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Langchain Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms import Ollama\n",
+    "from langchain.callbacks.manager import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### CODELLAMA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "```\n",
+      "def add(a, b):\n",
+      "    return a + b\n",
+      "```\n",
+      "This function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\n",
+      "```\n",
+      "print(add(3, 5)) # prints 8\n",
+      "```"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'```\\ndef add(a, b):\\n    return a + b\\n```\\nThis function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\\n```\\nprint(add(3, 5)) # prints 8\\n```'"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm = Ollama(\n",
+    "    model=\"codellama\",\n",
+    "    base_url=\"https://thewise-ollama-server.hf.space\",\n",
+    "    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
+    "\n",
+    "llm('Write Python code to add 2 numbers')"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### LLAMA2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "```\n",
+      "# Adding two numbers\n",
+      "a = 5\n",
+      "b = 3\n",
+      "result = a + b\n",
+      "print(result) # Output: 8\n",
+      "```"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'```\\n# Adding two numbers\\na = 5\\nb = 3\\nresult = a + b\\nprint(result) # Output: 8\\n```'"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm = Ollama(\n",
+    "    model=\"llama2\",\n",
+    "    base_url=\"https://thewise-ollama-server.hf.space\",\n",
+    "    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
+    "\n",
+    "llm('Write Python code to add 2 numbers')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "langchain",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}