File size: 2,765 Bytes
e80c39d
e0051f5
e2a5d47
 
 
 
 
 
c06e81e
e2a5d47
 
d9e9b95
e2a5d47
 
 
 
 
 
 
 
 
 
 
 
e0051f5
 
e2a5d47
 
 
 
 
e0051f5
e2a5d47
 
 
 
 
 
 
 
 
e0051f5
e2a5d47
 
 
d9e9b95
e0051f5
e80c39d
e0051f5
e2a5d47
 
 
 
 
 
e0051f5
e2a5d47
 
 
e0051f5
e2a5d47
 
 
 
e0051f5
e2a5d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0051f5
e2a5d47
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import torch
import streamlit as st
from peft import PeftModel
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the model and tokenizer
# def load_model_and_tokenizer():
#     model_name = "AIModels24/Indian_Constitution"  # Replace with your model name

#     # Define quantization configuration for 4-bit quantization
#     # quant_config = BitsAndBytesConfig(load_in_4bit=True)  # 4-bit quantization

#     # Load the tokenizer
#     tokenizer = AutoTokenizer.from_pretrained(model_name)

#     # Load the model with 4-bit quantization
#     model = AutoModelForCausalLM.from_pretrained(
#         model_name,
#         # quantization_config=quant_config,
#         device_map=None,
#         low_cpu_mem_usage=True
#     )

#     return model, tokenizer

@st.cache_resource
def load_model_and_tokenizer():
    # Base model
    base_model_name = "unsloth/llama-3-8b-bnb-4bit"
    adapter_name = "AIModels24/Indian_Constitution"

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)

    # Load the base model
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map=None,
        low_cpu_mem_usage=True,
        use_cache=True
        
    )

    # Load the LoRA adapter
    model = PeftModel.from_pretrained(model, adapter_name)

    return model, tokenizer


# Load model and tokenizer using the function
model, tokenizer = load_model_and_tokenizer()

## prompt function 
alpaca_prompt = "### Instruction:\n{}\n\n### Response:\n"


# Streamlit User Interface
st.title("भारतीय कानून व्यवस्था")
st.subheader("AI-powered responses for legal questions in Indian law")

# Input text box for user question
instruction = st.text_area("Enter your question:", placeholder="Ask a question about Indian law...")

# Generate response button
if st.button("Generate Response"):
    if instruction.strip():
        with st.spinner("Generating response..."):
            # Prepare the prompt for the model
            inputs = tokenizer(
                [alpaca_prompt.format(instruction)], 
                return_tensors="pt"
            ).to("cuda")

            # Generate the response
            outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True)
            response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
            
            # Extract the clean response
            response_cleaned = response.split("### Response:\n")[-1].strip()

        # Display the response
        st.success("Response:")
        st.write(response_cleaned)
    else:
        st.error("Please enter a question to generate a response.")