from flask import Flask, request, jsonify import requests from typing import List, Dict, Union import json app = Flask(__name__) models = ['cognitivecomputations/dolphin-2.6-mixtral-8x7b', 'databricks/dbrx-instruct', 'google/gemma-1.1-7b-it', 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'lizpreciatior/lzlv_70b_fp16_hf', 'meta-llama/Meta-Llama-3-70B-Instruct', 'meta-llama/Meta-Llama-3-8B-Instruct', 'microsoft/WizardLM-2-7B', 'microsoft/WizardLM-2-8x22B', 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mixtral-8x22B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'openchat/openchat-3.6-8b'] class LLM: def __init__(self, model: str): self.model = model self.conversation_history = [{"role": "system", "content": "You are a Helpful AI."}] def chat(self, messages: List[Dict[str, str]], system_message: str = None) -> Union[str, None]: if system_message is not None: self.conversation_history.insert(0, {"role": "system", "content": system_message}) all_messages = self.conversation_history + messages url = "https://api.deepinfra.com/v1/openai/chat/completions" headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Type': 'application/json', 'Origin': 'https://deepinfra.com', 'Pragma': 'no-cache', 'Referer': 'https://deepinfra.com/', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-site', 'X-Deepinfra-Source': 'web-embed', 'accept': 'text/event-stream', 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"' } data = json.dumps( { 'model': self.model, 'messages': all_messages, 'temperature': 0.7, 'max_tokens': 8028, 'stop': [], 'stream': False }, separators=(',', ':') ) try: result = requests.post(url=url, data=data, headers=headers) return result.json()['choices'][0]['message']['content'] except: return None def GenerativeIO(text, Model, System_Prompt): llm = LLM(model=Model) messages = [ {"role": "system", "content": text}, {"role": "user", "content": System_Prompt} ] response = llm.chat(messages) return response @app.route('/generate', methods=['POST']) def generate(): data = request.get_json() text = data.get('text') Model = data.get('Model') System_Prompt = data.get('System_Prompt') response = GenerativeIO(text, Model, System_Prompt) return jsonify({'response': response}) @app.route('/models', methods=['GET']) def get_models(): return jsonify(models) if __name__ == '__main__': app.run()