##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ###### # import aiohttp # import json # import asyncio # import requests # # async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): # session = aiohttp.ClientSession() # url = f'{api_base}/api/generate' # data = { # "model": model, # "prompt": prompt, # } # response = "" # try: # async with session.post(url, json=data) as resp: # async for line in resp.content.iter_any(): # if line: # try: # json_chunk = line.decode("utf-8") # chunks = json_chunk.split("\n") # for chunk in chunks: # if chunk.strip() != "": # j = json.loads(chunk) # if "response" in j: # print(j["response"]) # yield { # "role": "assistant", # "content": j["response"] # } # # self.responses.append(j["response"]) # # yield "blank" # except Exception as e: # print(f"Error decoding JSON: {e}") # finally: # await session.close() # async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): # generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?") # response = "" # async for elem in generator: # print(elem) # response += elem["content"] # return response # #generator = get_ollama_response_stream() # result = asyncio.run(get_ollama_response_no_stream()) # print(result) # # return this generator to the client for streaming requests # async def get_response(): # global generator # async for elem in generator: # print(elem) # asyncio.run(get_response()) ##### latest implementation of making raw http post requests to local ollama server # import requests # import json # def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): # url = f"{api_base}/api/generate" # data = { # "model": model, # "prompt": prompt, # } # session = requests.Session() # with session.post(url, json=data, stream=True) as resp: # for line in resp.iter_lines(): # if line: # try: # json_chunk = line.decode("utf-8") # chunks = json_chunk.split("\n") # for chunk in chunks: # if chunk.strip() != "": # j = json.loads(chunk) # if "response" in j: # completion_obj = { # "role": "assistant", # "content": "", # } # completion_obj["content"] = j["response"] # yield {"choices": [{"delta": completion_obj}]} # except Exception as e: # print(f"Error decoding JSON: {e}") # session.close() # response = get_ollama_response_stream() # for chunk in response: # print(chunk['choices'][0]['delta'])