|
#pragma once |
|
|
|
#include "llama.h" |
|
#include "common.h" |
|
|
|
struct common_speculative; |
|
|
|
struct common_speculative_params { |
|
int n_draft = 16; |
|
int n_reuse = 256; |
|
|
|
float p_min = 0.9f; |
|
}; |
|
|
|
struct common_speculative * common_speculative_init(struct llama_context * ctx_dft); |
|
|
|
void common_speculative_free(struct common_speculative * spec); |
|
|
|
bool common_speculative_are_compatible( |
|
const struct llama_context * ctx_tgt, |
|
const struct llama_context * ctx_dft); |
|
|
|
|
|
llama_tokens common_speculative_gen_draft( |
|
struct common_speculative * spec, |
|
struct common_speculative_params params, |
|
const llama_tokens & prompt, |
|
llama_token id_last); |
|
|