File size: 3,660 Bytes
395201c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
# import aiohttp
# import json
# import asyncio
# import requests
# 
# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
#     session = aiohttp.ClientSession()
#     url = f'{api_base}/api/generate'
#     data = {
#         "model": model,
#         "prompt": prompt,
#     }

#     response = ""

#     try:
#         async with session.post(url, json=data) as resp:
#             async for line in resp.content.iter_any():
#                 if line:
#                     try:
#                         json_chunk = line.decode("utf-8")
#                         chunks = json_chunk.split("\n")
#                         for chunk in chunks:
#                             if chunk.strip() != "":
#                                 j = json.loads(chunk)
#                                 if "response" in j:
#                                     print(j["response"])
#                                     yield {
#                                         "role": "assistant",
#                                         "content": j["response"]
#                                     }
#                                     # self.responses.append(j["response"])
#                                     # yield "blank"
#                     except Exception as e:
#                         print(f"Error decoding JSON: {e}")
#     finally:
#         await session.close()

# async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
#     generator =  get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?")
#     response = ""
#     async for elem in generator:
#         print(elem)
#         response += elem["content"]
#     return response

# #generator = get_ollama_response_stream()

# result = asyncio.run(get_ollama_response_no_stream())
# print(result)

# # return this generator to the client for streaming requests


# async def get_response():
#     global generator
#     async for elem in generator:
#         print(elem)

# asyncio.run(get_response())



##### latest implementation of making raw http post requests to local ollama server

# import requests
# import json
# def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
#     url = f"{api_base}/api/generate"
#     data = {
#         "model": model,
#         "prompt": prompt,
#     }
#     session = requests.Session()

#     with session.post(url, json=data, stream=True) as resp:
#         for line in resp.iter_lines():
#             if line:
#                 try:
#                     json_chunk = line.decode("utf-8")
#                     chunks = json_chunk.split("\n")
#                     for chunk in chunks:
#                         if chunk.strip() != "":
#                             j = json.loads(chunk)
#                             if "response" in j:
#                                 completion_obj = {
#                                     "role": "assistant",
#                                     "content": "",
#                                 }
#                                 completion_obj["content"] = j["response"]
#                                 yield {"choices": [{"delta": completion_obj}]}
#                 except Exception as e:
#                     print(f"Error decoding JSON: {e}")
#     session.close()

# response = get_ollama_response_stream()

# for chunk in response:
#     print(chunk['choices'][0]['delta'])