File size: 4,311 Bytes
15e79cf
 
024d742
 
15e79cf
 
 
 
 
0ed91c3
15e79cf
 
 
 
 
 
0891c5a
 
 
 
024d742
 
 
 
 
 
 
 
486c63d
d06e001
 
024d742
 
 
 
 
d06e001
 
024d742
c69c739
 
 
024d742
c69c739
024d742
 
c69c739
 
 
d06e001
 
c69c739
 
 
 
 
 
d06e001
c69c739
 
024d742
 
d06e001
024d742
 
f38f0f4
0ed91c3
 
 
024d742
d06e001
024d742
d06e001
024d742
0ed91c3
 
 
 
 
 
 
74035a5
0ed91c3
 
d06e001
0ed91c3
d06e001
 
 
 
 
 
 
 
024d742
 
2547305
 
 
 
 
 
 
 
 
2ee6dd5
 
2547305
 
 
d06e001
 
 
 
 
 
 
 
 
 
024d742
d06e001
024d742
 
 
d06e001
 
024d742
 
d06e001
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import subprocess
import sys
import time
from collections import defaultdict, deque

def install_and_import(package):
    try:
        __import__(package)
    except ImportError:
        print(f"{package} is not installed, installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install_and_import("gradio")
install_and_import("transformers")
install_and_import("torch")

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# === RATE LIMIT ===
click_logs = defaultdict(lambda: {
    "minute": deque(),
    "hour": deque(),
    "day": deque()
})

LIMITS = {
    "minute": (3, 60),
    "hour": (50, 3600),
    "day": (250, 86400)
}

def check_rate_limit(session_id):
    now = time.time()
    logs = click_logs[session_id]
    remaining = {}
    reset_times = {}

    limit_exceeded = False
    message = None

    for key, (limit, interval) in LIMITS.items():
        # Geçmiş istekleri temizle
        while logs[key] and now - logs[key][0] > interval:
            logs[key].popleft()

        current_usage = len(logs[key])
        remaining[key] = max(0, limit - current_usage)
        reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else 0)

        if current_usage >= limit:
            limit_exceeded = True
            message = f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})"

    if limit_exceeded:
        return False, message, remaining, reset_times

    # Limit aşılmadıysa log'a şimdi ekle
    for key in LIMITS:
        logs[key].append(now)

    return True, None, remaining, reset_times

# === MODEL ===
model_name = "Bertug1911/BrtGPT-124m-Base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def generate_text(prompt, temperature, top_k, max_new_tokens, session_id):
    allowed, message, remaining, reset_times = check_rate_limit(session_id)
    if not allowed:
        return message, format_status(remaining, reset_times)

    inputs = tokenizer(prompt, return_tensors="pt")
    output = model.generate(
        **inputs,
        max_new_tokens=int(max_new_tokens),
        temperature=float(temperature),
        top_k=int(top_k),
        do_sample=True,
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
    generated_text = generated_text.replace(" ", "").replace("Ġ", " ")
    return generated_text, format_status(remaining, reset_times)

def format_status(remaining, reset_times):
    lines = []
    for key in ["minute", "hour", "day"]:
        lines.append(f"🕒 {key.capitalize()}: {remaining[key]} left — resets in {reset_times[key]} sec")
    return "\n".join(lines)

# === UI ===
with gr.Blocks() as app:
    session_id = gr.State(str(time.time()))

    gr.Markdown("""
    # 🤖 BrtGPT-124m-Base Text Generation
    
    Generate text using the BrtGPT-124m-Base model with professional rate limiting, Currently, we have limits for anonymous (no login function) usage directly from here or as API;
    
    **Rate Limits:**
    - ⏱️ Minute: 3 requests per minute
    - 🕐 Hour: 50 requests per hour
    - 📅 Day: 250 requests per day
    
    - When the website is added and the login system is added, we will change the Rate Limit numbers according to the subscription and subscription, sorry for the Rate Limit but it is the only way to prevent very long queues and Spam!
    Also, if the infrastructure is insufficient, we will strengthen the GPU!
    NOTE: Rate Limit is based on IP. LIMIT CANNOT BE RESET according to Browser, API, Interface!
    """)

    with gr.Row():
        prompt = gr.Textbox(lines=3, placeholder="Enter your prompt here...", label="Prompt")
        output = gr.Textbox(label="Generated Text")

    with gr.Row():
        temperature = gr.Slider(0.01, 1.0, value=0.5, step=0.01, label="Temperature")
        top_k = gr.Slider(1, 50, value=10, step=1, label="Top-K")
        max_new_tokens = gr.Slider(1, 50, value=15, step=1, label="Max New Tokens")

    generate_button = gr.Button("Generate")
    status_text = gr.Markdown()

    generate_button.click(
        fn=generate_text,
        inputs=[prompt, temperature, top_k, max_new_tokens, session_id],
        outputs=[output, status_text]
    )

app.launch()