File size: 3,660 Bytes
395201c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
# import aiohttp
# import json
# import asyncio
# import requests
#
# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
# session = aiohttp.ClientSession()
# url = f'{api_base}/api/generate'
# data = {
# "model": model,
# "prompt": prompt,
# }
# response = ""
# try:
# async with session.post(url, json=data) as resp:
# async for line in resp.content.iter_any():
# if line:
# try:
# json_chunk = line.decode("utf-8")
# chunks = json_chunk.split("\n")
# for chunk in chunks:
# if chunk.strip() != "":
# j = json.loads(chunk)
# if "response" in j:
# print(j["response"])
# yield {
# "role": "assistant",
# "content": j["response"]
# }
# # self.responses.append(j["response"])
# # yield "blank"
# except Exception as e:
# print(f"Error decoding JSON: {e}")
# finally:
# await session.close()
# async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
# generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?")
# response = ""
# async for elem in generator:
# print(elem)
# response += elem["content"]
# return response
# #generator = get_ollama_response_stream()
# result = asyncio.run(get_ollama_response_no_stream())
# print(result)
# # return this generator to the client for streaming requests
# async def get_response():
# global generator
# async for elem in generator:
# print(elem)
# asyncio.run(get_response())
##### latest implementation of making raw http post requests to local ollama server
# import requests
# import json
# def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
# url = f"{api_base}/api/generate"
# data = {
# "model": model,
# "prompt": prompt,
# }
# session = requests.Session()
# with session.post(url, json=data, stream=True) as resp:
# for line in resp.iter_lines():
# if line:
# try:
# json_chunk = line.decode("utf-8")
# chunks = json_chunk.split("\n")
# for chunk in chunks:
# if chunk.strip() != "":
# j = json.loads(chunk)
# if "response" in j:
# completion_obj = {
# "role": "assistant",
# "content": "",
# }
# completion_obj["content"] = j["response"]
# yield {"choices": [{"delta": completion_obj}]}
# except Exception as e:
# print(f"Error decoding JSON: {e}")
# session.close()
# response = get_ollama_response_stream()
# for chunk in response:
# print(chunk['choices'][0]['delta'])
|