File size: 1,961 Bytes
5ff5c8e
 
 
3409bf7
 
4d38cb5
 
 
 
c004d6b
 
3409bf7
5ff5c8e
 
 
 
 
 
 
3409bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ff5c8e
3409bf7
 
5ff5c8e
3409bf7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from fastapi import FastAPI, HTTPException
from fastapi.responses import PlainTextResponse
from pydantic import BaseModel
from llm_func import llm_normal,llm_agent,name_age,llm_functioncalling,search,llm_search
from llama_cpp import Llama
import multiprocessing
print(multiprocessing.cpu_count())
print(max(multiprocessing.cpu_count() // 2, 1))

llm = Llama.from_pretrained(repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",filename="*smollm2-360m-instruct-q8_0.gguf",verbose=False,n_ctx=500,n_threads = 1) 
#llm = Llama.from_pretrained(repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",filename="*SmolLM2-360M-Instruct-Q2_K.gguf",verbose=False,n_ctx=500,n_threads = max(multiprocessing.cpu_count() // 2, 1) ) 


app = FastAPI()

class TextInput(BaseModel):
    text: str


@app.post("/llm_normal/", response_class=PlainTextResponse)
async def to_llm_normal(input_data: TextInput):
    if not input_data.text:
        raise HTTPException(status_code=400, detail="The text can't be empty.")
    output_data = llm_normal(input_data.text,llm)
    return output_data


@app.post("/llm_agent/", response_class=PlainTextResponse)
async def to_llm_agent(input_data: TextInput):
    if not input_data.text:
        raise HTTPException(status_code=400, detail="The text can't be empty.")
    output_data = llm_agent(input_data.text,llm)
    return output_data


@app.post("/llm_functioncalling/", response_class=PlainTextResponse)
async def to_llm_functioncalling(input_data: TextInput):
    if not input_data.text:
        raise HTTPException(status_code=400, detail="The text can't be empty.")
    output_data = llm_functioncalling(input_data.text,llm)
    return output_data


@app.post("/llm_search/", response_class=PlainTextResponse)
async def to_llm_search(input_data: TextInput):
    if not input_data.text:
        raise HTTPException(status_code=400, detail="The text can't be empty.")
    output_data = llm_search(input_data.text,llm)
    return output_data