Spaces:

comfyuiman
/

loracaptionertaz

Running

App Files Files Community

loracaptionertaz / services /openRouterService.ts

comfyuiman

Upload 21 files

5191cb5 verified 15 days ago

raw

history blame contribute delete

14 kB


	/**
	* Service for interacting with OpenRouter API.
	*/

	const fileToBase64 = (file: File): Promise<string> => {
	return new Promise((resolve, reject) => {
	const reader = new FileReader();
	reader.readAsDataURL(file);
	reader.onload = () => {
	if (typeof reader.result === 'string') {
	resolve(reader.result);
	} else {
	reject(new Error('Failed to convert file to base64'));
	}
	};
	reader.onerror = error => reject(error);
	});
	};

	const extractFramesFromVideo = async (videoFile: File, numberOfFrames: number, signal?: AbortSignal): Promise<string[]> => {
	return new Promise((resolve, reject) => {
	const video = document.createElement('video');
	video.preload = 'metadata';
	video.muted = true;
	video.playsInline = true;
	const url = URL.createObjectURL(videoFile);
	const frames: string[] = [];

	const onAbort = () => {
	URL.revokeObjectURL(url);
	video.src = "";
	reject(new Error("AbortError"));
	};
	if (signal) signal.addEventListener('abort', onAbort);

	const timeout = setTimeout(() => {
	if (signal) signal.removeEventListener('abort', onAbort);
	URL.revokeObjectURL(url);
	video.src = "";
	reject(new Error("Video processing timed out"));
	}, 60000);

	video.onloadeddata = async () => {
	const duration = video.duration;
	const canvas = document.createElement('canvas');
	const ctx = canvas.getContext('2d');
	if (!ctx) {
	if (signal) signal.removeEventListener('abort', onAbort);
	clearTimeout(timeout);
	URL.revokeObjectURL(url);
	reject(new Error("Could not create canvas context"));
	return;
	}
	canvas.width = video.videoWidth;
	canvas.height = video.videoHeight;
	const step = duration / numberOfFrames;
	try {
	for (let i = 0; i < numberOfFrames; i++) {
	if (signal?.aborted) throw new Error("AbortError");
	const time = (step * i) + (step / 2);
	await new Promise<void>((frameResolve) => {
	const onSeeked = () => {
	video.removeEventListener('seeked', onSeeked);
	frameResolve();
	};
	video.addEventListener('seeked', onSeeked);
	video.currentTime = Math.min(time, duration - 0.1);
	});
	ctx.drawImage(video, 0, 0);
	frames.push(canvas.toDataURL('image/jpeg', 0.8));
	}
	if (signal) signal.removeEventListener('abort', onAbort);
	clearTimeout(timeout);
	URL.revokeObjectURL(url);
	video.src = "";
	resolve(frames);
	} catch (e) {
	if (signal) signal.removeEventListener('abort', onAbort);
	clearTimeout(timeout);
	URL.revokeObjectURL(url);
	reject(e);
	}
	};
	video.onerror = () => {
	if (signal) signal.removeEventListener('abort', onAbort);
	clearTimeout(timeout);
	URL.revokeObjectURL(url);
	reject(new Error("Failed to load video file"));
	};
	video.src = url;
	});
	};

	const constructPrompt = (
	triggerWord: string,
	customInstructions?: string,
	isCharacterTaggingEnabled?: boolean,
	characterShowName?: string
	): string => {
	let basePrompt = `You are an expert captioner for AI model training data. Your task is to describe the provided image/video in detail for a style LoRA. Follow these rules strictly:
	1. Start the caption with the trigger word: "${triggerWord}".
	2. Describe EVERYTHING visible: characters, clothing, actions, background, objects, lighting, and camera angle.
	3. Be objective and factual.
	4. DO NOT mention art styles or generic animation terms like "anime" or "cartoon".
	5. Write as a single, continuous paragraph.`;

	if (isCharacterTaggingEnabled && characterShowName && characterShowName.trim() !== '') {
	basePrompt += `\n6. Identify characters from the show/series "${characterShowName}" and append tags at the end of the caption, separated by commas. The format for each tag must be "char_[charactername]" (e.g., ", char_simon, char_kamina"). If no characters are recognized, do not add tags.`;
	}

	if (customInstructions) {
	return `${basePrompt}\n\nAdditional instructions: ${customInstructions}`;
	}
	return basePrompt;
	};

	export const generateCaptionOpenRouter = async (
	apiKey: string,
	model: string,
	file: File,
	triggerWord: string,
	customInstructions?: string,
	isCharacterTaggingEnabled?: boolean,
	characterShowName?: string,
	videoFrameCount: number = 8,
	maxTokens: number = 4096,
	temperature: number = 0.7,
	useFullVideo: boolean = false,
	signal?: AbortSignal
	): Promise<string> => {
	if (!apiKey) throw new Error("OpenRouter API Key is required.");
	const endpoint = 'https://openrouter.ai/api/v1/chat/completions';
	const prompt = constructPrompt(triggerWord, customInstructions, isCharacterTaggingEnabled, characterShowName);

	// Extract model ID from URL if provided
	let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() \|\| '' : model;
	// Handle /models/ prefix if it exists in the URL
	if (modelId.startsWith('models/')) {
	modelId = modelId.replace('models/', '');
	}
	// Remove any trailing slashes or query params
	modelId = modelId.split('?')[0].replace(/\/+$/, '');

	let contentParts: any[] = [{ type: "text", text: prompt }];
	if (file.type.startsWith('video/')) {
	if (useFullVideo) {
	const base64Video = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Video } });
	} else {
	const frames = await extractFramesFromVideo(file, videoFrameCount, signal);
	frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
	}
	} else {
	const base64Image = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Image } });
	}

	const payload = {
	model: modelId \|\| 'openai/gpt-4o-mini',
	messages: [{ role: "user", content: contentParts }],
	max_tokens: maxTokens,
	temperature: temperature
	};

	const response = await fetch(endpoint, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	"Authorization": `Bearer ${apiKey}`,
	"HTTP-Referer": window.location.origin,
	"X-Title": "LoRA Caption Assistant"
	},
	body: JSON.stringify(payload),
	signal
	});

	if (!response.ok) {
	let errorMessage = response.statusText;
	try {
	const errData = await response.json();
	errorMessage = errData.error?.message \|\| errData.message \|\| JSON.stringify(errData) \|\| errorMessage;
	} catch (e) {
	const errText = await response.text().catch(() => "");
	if (errText) errorMessage = errText;
	}
	throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`);
	}

	const data = await response.json();
	console.log('OpenRouter Generate Response:', data);
	const message = data.choices?.[0]?.message;
	let content = "";

	if (message) {
	if (typeof message.content === 'string') {
	content = message.content.trim();
	} else if (Array.isArray(message.content)) {
	// Handle cases where content might be returned as an array of parts
	content = message.content
	.filter((part: any) => part.type === 'text')
	.map((part: any) => part.text)
	.join('\n')
	.trim();
	}
	}

	const refusal = message?.refusal;
	const reasoning = message?.reasoning;
	const finishReason = data.choices?.[0]?.finish_reason;

	if (!content && refusal) {
	throw new Error(`OpenRouter Refusal: ${refusal}`);
	}

	if (!content && finishReason === 'length') {
	if (reasoning) {
	// If we only have reasoning and it hit the length limit, the model likely
	// spent all tokens "thinking" and never got to the output.
	throw new Error("OpenRouter model hit token limit during reasoning. Try increasing max tokens or using a non-reasoning model.");
	}
	throw new Error("OpenRouter response was cut off (hit token limit).");
	}

	if (!content && finishReason === 'content_filter') {
	throw new Error("OpenRouter response was blocked by content filter.");
	}

	// Some models might put the result in reasoning if content is null,
	// though rare for standard chat completions.
	return content \|\| (reasoning ? `[Reasoning Only]: ${reasoning}` : "");
	};

	export const refineCaptionOpenRouter = async (
	apiKey: string,
	model: string,
	file: File,
	currentCaption: string,
	refinementInstructions: string,
	videoFrameCount: number = 4,
	maxTokens: number = 4096,
	temperature: number = 0.7,
	useFullVideo: boolean = false,
	signal?: AbortSignal
	): Promise<string> => {
	if (!apiKey) throw new Error("OpenRouter API Key is required.");
	const endpoint = 'https://openrouter.ai/api/v1/chat/completions';
	const prompt = `Refine the following caption based on the visual information and the instructions. Output ONLY the refined text.
	CURRENT CAPTION: "${currentCaption}"
	INSTRUCTIONS: "${refinementInstructions}"`;

	let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() \|\| '' : model;
	if (modelId.startsWith('models/')) modelId = modelId.replace('models/', '');
	modelId = modelId.split('?')[0].replace(/\/+$/, '');

	let contentParts: any[] = [{ type: "text", text: prompt }];
	if (file.type.startsWith('video/')) {
	if (useFullVideo) {
	const base64Video = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Video } });
	} else {
	const frames = await extractFramesFromVideo(file, videoFrameCount, signal);
	frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
	}
	} else {
	const base64Image = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Image } });
	}

	const payload = {
	model: modelId \|\| 'openai/gpt-4o-mini',
	messages: [{ role: "user", content: contentParts }],
	max_tokens: maxTokens,
	temperature: temperature
	};

	const response = await fetch(endpoint, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	"Authorization": `Bearer ${apiKey}`,
	"HTTP-Referer": window.location.origin,
	"X-Title": "LoRA Caption Assistant"
	},
	body: JSON.stringify(payload),
	signal
	});

	if (!response.ok) {
	let errorMessage = response.statusText;
	try {
	const errData = await response.json();
	errorMessage = errData.error?.message \|\| errData.message \|\| JSON.stringify(errData) \|\| errorMessage;
	} catch (e) {
	const errText = await response.text().catch(() => "");
	if (errText) errorMessage = errText;
	}
	throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`);
	}
	const data = await response.json();
	console.log('OpenRouter Refine Response:', data);
	const content = data.choices?.[0]?.message?.content?.trim();
	const refusal = data.choices?.[0]?.message?.refusal;
	if (!content && refusal) throw new Error(`OpenRouter Refusal: ${refusal}`);
	return content \|\| "";
	};

	export const checkQualityOpenRouter = async (
	apiKey: string,
	model: string,
	file: File,
	caption: string,
	videoFrameCount: number = 4,
	temperature: number = 0.7,
	useFullVideo: boolean = false,
	signal?: AbortSignal
	): Promise<number> => {
	if (!apiKey) throw new Error("OpenRouter API Key is required.");
	const endpoint = 'https://openrouter.ai/api/v1/chat/completions';
	const prompt = `Evaluate the caption quality. Respond with ONLY an integer from 1 to 5.\nCaption: "${caption}"`;

	let modelId = model.includes('openrouter.ai/') ? model.split('openrouter.ai/').pop() \|\| '' : model;
	if (modelId.startsWith('models/')) modelId = modelId.replace('models/', '');
	modelId = modelId.split('?')[0].replace(/\/+$/, '');

	let contentParts: any[] = [{ type: "text", text: prompt }];
	if (file.type.startsWith('video/')) {
	if (useFullVideo) {
	const base64Video = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Video } });
	} else {
	const frames = await extractFramesFromVideo(file, videoFrameCount, signal);
	frames.forEach(frame => contentParts.push({ type: "image_url", image_url: { url: frame } }));
	}
	} else {
	const base64Image = await fileToBase64(file);
	contentParts.push({ type: "image_url", image_url: { url: base64Image } });
	}

	const payload = {
	model: modelId \|\| 'openai/gpt-4o-mini',
	messages: [{ role: "user", content: contentParts }],
	max_tokens: 10,
	temperature: temperature
	};

	const response = await fetch(endpoint, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	"Authorization": `Bearer ${apiKey}`,
	"HTTP-Referer": window.location.origin,
	"X-Title": "LoRA Caption Assistant"
	},
	body: JSON.stringify(payload),
	signal
	});

	if (!response.ok) {
	let errorMessage = response.statusText;
	try {
	const errData = await response.json();
	errorMessage = errData.error?.message \|\| errData.message \|\| JSON.stringify(errData) \|\| errorMessage;
	} catch (e) {
	const errText = await response.text().catch(() => "");
	if (errText) errorMessage = errText;
	}
	throw new Error(`OpenRouter API Error (${response.status}): ${errorMessage}`);
	}
	const data = await response.json();
	console.log('OpenRouter Quality Response:', data);
	const text = data.choices?.[0]?.message?.content?.trim();
	const refusal = data.choices?.[0]?.message?.refusal;
	if (!text && refusal) throw new Error(`OpenRouter Refusal: ${refusal}`);
	return parseInt(text?.match(/\d+/)?.[0] \|\| '0', 10);
	};