Spaces:
Sleeping
Sleeping
File size: 4,054 Bytes
28d024b cb66cb4 0707b48 f951332 cb66cb4 28d024b 0707b48 f951332 0707b48 f951332 cb66cb4 0707b48 cb66cb4 0707b48 cb66cb4 f951332 0707b48 f951332 cb66cb4 f951332 0707b48 f951332 0707b48 28d024b cb66cb4 0707b48 cb66cb4 0707b48 cb66cb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import cohere
import asyncio
# Global client variable for lazy initialization
_client = None
def get_client(api_key):
"""Get or create Cohere client instance"""
global _client
if _client is None:
_client = cohere.ClientV2(api_key)
return _client
def send_message_stream(system_message, user_message, conversation_history, api_key, model_name="command-a-03-2025", temperature=0.7, max_tokens=None):
"""Stream response from Cohere API"""
# Get or create the Cohere client
co = get_client(api_key)
# Prepare all messages including history
messages = [{"role": "system", "content": system_message}]
messages.extend(conversation_history)
messages.append({"role": "user", "content": user_message})
# Prepare chat parameters
chat_params = {
"model": model_name,
"messages": messages,
"temperature": temperature
}
# Add max_tokens if specified
if max_tokens:
chat_params["max_tokens"] = int(max_tokens)
# Send streaming request to Cohere
stream = co.chat_stream(**chat_params)
# Collect full response for history
full_response = ""
# Yield chunks as they come
for chunk in stream:
if chunk.type == "content-delta":
text_chunk = chunk.delta.message.content.text
full_response += text_chunk
yield text_chunk
async def send_message_stream_async(system_message, user_message, conversation_history, api_key, model_name="command-a-03-2025", temperature=0.7, max_tokens=None):
"""Async wrapper for streaming response from Cohere API"""
def _sync_stream():
return send_message_stream(system_message, user_message, conversation_history, api_key, model_name, temperature, max_tokens)
# Run the synchronous generator in a thread
loop = asyncio.get_event_loop()
# Use a queue to handle the streaming data
queue = asyncio.Queue()
def _stream_worker():
try:
for chunk in _sync_stream():
loop.call_soon_threadsafe(queue.put_nowait, chunk)
except Exception as e:
loop.call_soon_threadsafe(queue.put_nowait, StopIteration(e))
else:
loop.call_soon_threadsafe(queue.put_nowait, StopIteration())
# Start the worker thread
import threading
thread = threading.Thread(target=_stream_worker)
thread.start()
# Yield chunks asynchronously
while True:
chunk = await queue.get()
if isinstance(chunk, StopIteration):
if chunk.args:
raise chunk.args[0]
break
yield chunk
def send_message(system_message, user_message, conversation_history, api_key, model_name="command-a-03-2025", temperature=0.7, max_tokens=None):
"""Non-streaming version for backward compatibility"""
# Get or create the Cohere client
co = get_client(api_key)
# Prepare all messages including history
messages = [{"role": "system", "content": system_message}]
messages.extend(conversation_history)
messages.append({"role": "user", "content": user_message})
# Prepare chat parameters
chat_params = {
"model": model_name,
"messages": messages,
"temperature": temperature
}
# Add max_tokens if specified
if max_tokens:
chat_params["max_tokens"] = int(max_tokens)
# Send request to Cohere synchronously
response = co.chat(**chat_params)
# Get the response
response_content = response.message.content[0].text
return response_content
async def send_message_async(system_message, user_message, conversation_history, api_key, model_name="command-a-03-2025", temperature=0.7, max_tokens=None):
"""Async version using asyncio.to_thread"""
return await asyncio.to_thread(
send_message,
system_message,
user_message,
conversation_history,
api_key,
model_name,
temperature,
max_tokens
) |