AIModels24 commited on
Commit
e80c39d
·
verified ·
1 Parent(s): 092291a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -82
app.py CHANGED
@@ -1,85 +1,23 @@
1
- import torch
2
  import streamlit as st
3
- from peft import PeftModel
4
- # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
- # Load the model and tokenizer
7
- # def load_model_and_tokenizer():
8
- # model_name = "AIModels24/Indian_Constitution" # Replace with your model name
9
-
10
- # # Define quantization configuration for 4-bit quantization
11
- # # quant_config = BitsAndBytesConfig(load_in_4bit=True) # 4-bit quantization
12
-
13
- # # Load the tokenizer
14
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
15
-
16
- # # Load the model with 4-bit quantization
17
- # model = AutoModelForCausalLM.from_pretrained(
18
- # model_name,
19
- # # quantization_config=quant_config,
20
- # device_map=None,
21
- # low_cpu_mem_usage=True
22
- # )
23
-
24
- # return model, tokenizer
25
-
26
-
27
- def load_model_and_tokenizer():
28
- # Base model
29
- base_model_name = "unsloth/llama-3-8b-bnb-4bit"
30
- adapter_name = "AIModels24/Indian_Constitution"
31
-
32
- # Load the tokenizer
33
- tokenizer = AutoTokenizer.from_pretrained(base_model_name)
34
-
35
- # Load the base model
36
- model = AutoModelForCausalLM.from_pretrained(
37
- base_model_name,
38
- device_map=None,
39
- low_cpu_mem_usage=True,
40
- use_cache=True
41
-
42
- )
43
-
44
- # Load the LoRA adapter
45
- model = PeftModel.from_pretrained(model, adapter_name)
46
-
47
- return model, tokenizer
48
-
49
-
50
- # Load model and tokenizer using the function
51
- model, tokenizer = load_model_and_tokenizer()
52
-
53
- ## prompt function
54
- alpaca_prompt = "### Instruction:\n{}\n\n### Response:\n"
55
-
56
-
57
- # Streamlit User Interface
58
- st.title("भारतीय कानून व्यवस्था")
59
- st.subheader("AI-powered responses for legal questions in Indian law")
60
-
61
- # Input text box for user question
62
- instruction = st.text_area("Enter your question:", placeholder="Ask a question about Indian law...")
63
-
64
- # Generate response button
65
- if st.button("Generate Response"):
66
- if instruction.strip():
67
- with st.spinner("Generating response..."):
68
- # Prepare the prompt for the model
69
- inputs = tokenizer(
70
- [alpaca_prompt.format(instruction)],
71
- return_tensors="pt"
72
- ).to("cuda")
73
-
74
- # Generate the response
75
- outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True)
76
- response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
77
-
78
- # Extract the clean response
79
- response_cleaned = response.split("### Response:\n")[-1].strip()
80
 
81
- # Display the response
82
- st.success("Response:")
83
- st.write(response_cleaned)
84
- else:
85
- st.error("Please enter a question to generate a response.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Load the model
6
+ model_name = "AIModels24/Indian_Constitution"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ @st.cache_resource
11
+ def generate_response(prompt):
12
+ inputs = tokenizer(prompt, return_tensors="pt")
13
+ with torch.no_grad():
14
+ outputs = model.generate(inputs['input_ids'], max_length=50)
15
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
16
+
17
+ # Streamlit app interface
18
+ st.title("Text Generation with Hugging Face")
19
+ prompt = st.text_area("Enter your prompt:")
20
+
21
+ if st.button("Generate"):
22
+ response = generate_response(prompt)
23
+ st.write(response)