File size: 2,644 Bytes
93dad23
8d76cd2
 
93dad23
8d76cd2
93dad23
8d76cd2
 
93dad23
 
 
 
 
 
 
 
 
 
 
8d76cd2
 
 
 
 
 
 
 
 
 
 
 
 
 
93dad23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d76cd2
 
 
 
 
 
 
 
 
 
 
93dad23
a67522d
93dad23
 
 
 
8d76cd2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Initialize the model and tokenizer
model_name = "sarvamai/sarvam-2b-v0.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Supported languages
LANGUAGES = ["English", "Bengali", "Gujarati", "Hindi", "Kannada", "Malayalam", "Marathi", "Oriya", "Punjabi", "Tamil", "Telugu"]

def chatbot(message, history, language):
    # Prepare the prompt
    prompt = f"Conversation in {language}:\n"
    for human, ai in history:
        prompt += f"Human: {human}\nAI: {ai}\n"
    prompt += f"Human: {message}\nAI:"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            repetition_penalty=1.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the AI's response
    ai_response = response.split("AI:")[-1].strip()
    
    return ai_response

# Create the Gradio interface
iface = gr.ChatInterface(
    chatbot,
    additional_inputs=[
        gr.Dropdown(choices=LANGUAGES, label="Select Language", value="English")
    ],
    title="Multilingual Indian Chatbot",
    description="Chat in multiple Indian languages using the sarvam-2b model.",
    examples=[
        ["Hello, how are you?", "English"],
        ["नमस्ते, आप कैसे हैं?", "Hindi"],
        ["வணக்கம், எப்படி இருக்கிறீர்கள்?", "Tamil"],
        ["ନମସ୍କାର, ଆପଣ କେମିତି ଅଛନ୍ତି?", "Oriya"],
        ["નમસ્તે, તમે કેમ છો?", "Gujarati"],
        ["নমস্কার, আপনি কেমন আছেন?", "Bengali"],
        ["ನಮಸ್ಕಾರ, ನೀವು ಹೇಗಿದ್ದೀರಿ?", "Kannada"],
        ["നമസ്കാരം, സുഖമാണോ?", "Malayalam"],
        ["नमस्कार, तुम्ही कसे आहात?", "Marathi"],
        ["ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਤੁਸੀਂ ਕਿਵੇਂ ਹੋ?", "Punjabi"],
        ["నమస్కారం, మీరు ఎలా ఉన్నారు?", "Telugu"]
    ],
    cache_examples=False,  # Disable caching for examples
    theme="soft"
)

# Launch the interface
iface.launch()