Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	move some tasks to small model (#479)
Browse files
    	
        .env
    CHANGED
    
    | 
         @@ -61,6 +61,7 @@ MODELS=`[ 
     | 
|
| 61 | 
         
             
              }
         
     | 
| 62 | 
         
             
            ]`
         
     | 
| 63 | 
         
             
            OLD_MODELS=`[]`# any removed models, `{ name: string, displayName?: string, id?: string }`
         
     | 
| 
         | 
|
| 64 | 
         | 
| 65 | 
         
             
            PUBLIC_ORIGIN=#https://huggingface.co
         
     | 
| 66 | 
         
             
            PUBLIC_SHARE_PREFIX=#https://hf.co/chat
         
     | 
| 
         | 
|
| 61 | 
         
             
              }
         
     | 
| 62 | 
         
             
            ]`
         
     | 
| 63 | 
         
             
            OLD_MODELS=`[]`# any removed models, `{ name: string, displayName?: string, id?: string }`
         
     | 
| 64 | 
         
            +
            TASK_MODEL='' # name of the model used for tasks such as summarizing title, creating query, etc.
         
     | 
| 65 | 
         | 
| 66 | 
         
             
            PUBLIC_ORIGIN=#https://huggingface.co
         
     | 
| 67 | 
         
             
            PUBLIC_SHARE_PREFIX=#https://hf.co/chat
         
     | 
    	
        src/lib/server/generateFromDefaultEndpoint.ts
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 1 | 
         
            -
            import {  
     | 
| 2 | 
         
             
            import { modelEndpoint } from "./modelEndpoint";
         
     | 
| 3 | 
         
             
            import { trimSuffix } from "$lib/utils/trimSuffix";
         
     | 
| 4 | 
         
             
            import { trimPrefix } from "$lib/utils/trimPrefix";
         
     | 
| 
         @@ -16,12 +16,12 @@ export async function generateFromDefaultEndpoint( 
     | 
|
| 16 | 
         
             
            	parameters?: Partial<Parameters>
         
     | 
| 17 | 
         
             
            ): Promise<string> {
         
     | 
| 18 | 
         
             
            	const newParameters = {
         
     | 
| 19 | 
         
            -
            		... 
     | 
| 20 | 
         
             
            		...parameters,
         
     | 
| 21 | 
         
             
            		return_full_text: false,
         
     | 
| 22 | 
         
             
            	};
         
     | 
| 23 | 
         | 
| 24 | 
         
            -
            	const randomEndpoint = modelEndpoint( 
     | 
| 25 | 
         | 
| 26 | 
         
             
            	const abortController = new AbortController();
         
     | 
| 27 | 
         | 
| 
         | 
|
| 1 | 
         
            +
            import { smallModel } from "$lib/server/models";
         
     | 
| 2 | 
         
             
            import { modelEndpoint } from "./modelEndpoint";
         
     | 
| 3 | 
         
             
            import { trimSuffix } from "$lib/utils/trimSuffix";
         
     | 
| 4 | 
         
             
            import { trimPrefix } from "$lib/utils/trimPrefix";
         
     | 
| 
         | 
|
| 16 | 
         
             
            	parameters?: Partial<Parameters>
         
     | 
| 17 | 
         
             
            ): Promise<string> {
         
     | 
| 18 | 
         
             
            	const newParameters = {
         
     | 
| 19 | 
         
            +
            		...smallModel.parameters,
         
     | 
| 20 | 
         
             
            		...parameters,
         
     | 
| 21 | 
         
             
            		return_full_text: false,
         
     | 
| 22 | 
         
             
            	};
         
     | 
| 23 | 
         | 
| 24 | 
         
            +
            	const randomEndpoint = modelEndpoint(smallModel);
         
     | 
| 25 | 
         | 
| 26 | 
         
             
            	const abortController = new AbortController();
         
     | 
| 27 | 
         | 
    	
        src/lib/server/models.ts
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 1 | 
         
            -
            import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS } from "$env/static/private";
         
     | 
| 2 | 
         
             
            import type { ChatTemplateInput, WebSearchQueryTemplateInput } from "$lib/types/Template";
         
     | 
| 3 | 
         
             
            import { compileTemplate } from "$lib/utils/template";
         
     | 
| 4 | 
         
             
            import { z } from "zod";
         
     | 
| 
         @@ -133,6 +133,8 @@ export type Endpoint = z.infer<typeof endpoint>; 
     | 
|
| 133 | 
         | 
| 134 | 
         
             
            export const defaultModel = models[0];
         
     | 
| 135 | 
         | 
| 
         | 
|
| 
         | 
|
| 136 | 
         
             
            export const validateModel = (_models: BackendModel[]) => {
         
     | 
| 137 | 
         
             
            	// Zod enum function requires 2 parameters
         
     | 
| 138 | 
         
             
            	return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
         
     | 
| 
         | 
|
| 1 | 
         
            +
            import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS, TASK_MODEL } from "$env/static/private";
         
     | 
| 2 | 
         
             
            import type { ChatTemplateInput, WebSearchQueryTemplateInput } from "$lib/types/Template";
         
     | 
| 3 | 
         
             
            import { compileTemplate } from "$lib/utils/template";
         
     | 
| 4 | 
         
             
            import { z } from "zod";
         
     | 
| 
         | 
|
| 133 | 
         | 
| 134 | 
         
             
            export const defaultModel = models[0];
         
     | 
| 135 | 
         | 
| 136 | 
         
            +
            export const smallModel = models.find((m) => m.name === TASK_MODEL) || defaultModel;
         
     | 
| 137 | 
         
            +
             
     | 
| 138 | 
         
             
            export const validateModel = (_models: BackendModel[]) => {
         
     | 
| 139 | 
         
             
            	// Zod enum function requires 2 parameters
         
     | 
| 140 | 
         
             
            	return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
         
     | 
    	
        src/lib/server/summarize.ts
    CHANGED
    
    | 
         @@ -7,8 +7,18 @@ export async function summarize(prompt: string) { 
     | 
|
| 7 | 
         | 
| 8 | 
         
             
            	const summaryPrompt = await buildPrompt({
         
     | 
| 9 | 
         
             
            		messages: [{ from: "user", content: userPrompt }],
         
     | 
| 10 | 
         
            -
            		preprompt:
         
     | 
| 11 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 12 | 
         
             
            		model: defaultModel,
         
     | 
| 13 | 
         
             
            	});
         
     | 
| 14 | 
         | 
| 
         | 
|
| 7 | 
         | 
| 8 | 
         
             
            	const summaryPrompt = await buildPrompt({
         
     | 
| 9 | 
         
             
            		messages: [{ from: "user", content: userPrompt }],
         
     | 
| 10 | 
         
            +
            		preprompt: `
         
     | 
| 11 | 
         
            +
            You are a summarization AI. Your task is to summarize user requests, in a single sentence of less than 5 words. Do not try to answer questions, just summarize the user's request. Start your answer with an emoji relevant to the summary."
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            Example: "Who is the president of France ?"
         
     | 
| 14 | 
         
            +
            Summary: "🇫🇷 President of France request"
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            Example: "What are the latest news ?"
         
     | 
| 17 | 
         
            +
            Summary: "📰 Latest news"
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            Example: "Can you debug this python code?"
         
     | 
| 20 | 
         
            +
            Summary: "🐍 Python code debugging request"
         
     | 
| 21 | 
         
            +
            `,
         
     | 
| 22 | 
         
             
            		model: defaultModel,
         
     | 
| 23 | 
         
             
            	});
         
     | 
| 24 | 
         | 
    	
        src/routes/conversation/[id]/+page.svelte
    CHANGED
    
    | 
         @@ -161,8 +161,6 @@ 
     | 
|
| 161 | 
         
             
            								}
         
     | 
| 162 | 
         
             
            							} else if (update.type === "webSearch") {
         
     | 
| 163 | 
         
             
            								webSearchMessages = [...webSearchMessages, update];
         
     | 
| 164 | 
         
            -
            							} else {
         
     | 
| 165 | 
         
            -
            								console.log();
         
     | 
| 166 | 
         
             
            							}
         
     | 
| 167 | 
         
             
            						} catch (parseError) {
         
     | 
| 168 | 
         
             
            							// in case of parsing error we wait for the next message
         
     | 
| 
         | 
|
| 161 | 
         
             
            								}
         
     | 
| 162 | 
         
             
            							} else if (update.type === "webSearch") {
         
     | 
| 163 | 
         
             
            								webSearchMessages = [...webSearchMessages, update];
         
     | 
| 
         | 
|
| 
         | 
|
| 164 | 
         
             
            							}
         
     | 
| 165 | 
         
             
            						} catch (parseError) {
         
     | 
| 166 | 
         
             
            							// in case of parsing error we wait for the next message
         
     |