File size: 4,822 Bytes
b40e24a
c88adec
b40e24a
 
 
 
 
 
 
 
c88adec
b40e24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e800949
 
c42fa4f
e800949
c42fa4f
e800949
 
 
b40e24a
 
 
8969ebe
b40e24a
 
 
8969ebe
64ee729
 
 
 
 
 
 
 
 
 
 
 
b40e24a
 
8969ebe
b40e24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c88adec
12cbab2
c88adec
 
b40e24a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
import gradio as gr
# from PIL import Image
# import requests
# import torch
# import os
# from transformers import Gemma3ForConditionalGeneration, AutoProcessor
# print("hey")
# # Set the cache directory
# cache_dir = "F:\\huggingface_cache"

# # Set environment variables for good measure
# # os.environ["TRANSFORMERS_CACHE"] = cache_dir
# # os.environ["HF_HOME"] = cache_dir
# # os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir

# # Model ID
# model_id = "gemma3:latest"

# from ollama import chat
# from ollama import ChatResponse

# def _get_response(message):
#     messages = [
#         {
#             'role': 'user',
#             'content': message,
#         },
#     ]
#     response: ChatResponse = chat(model=model_id, messages=messages)
#     return response.message.content



# import requests
# import base64
# # Function to encode image to Base64
# def encode_image_to_base64(image_path):
#     with open(image_path, "rb") as image_file:
#         return base64.b64encode(image_file.read()).decode("utf-8")

# def image_process():
#     image_path = r"F:\HF\gemma-examples\WhatsApp Image 2025-03-21 at 10.05.06 PM.jpeg"  # Replace with your image path

#     # Encode the image
#     image_base64 = encode_image_to_base64(image_path)

#     # Ollama API endpoint
#     OLLAMA_URL = "http://localhost:11434/api/generate"

#     # Payload for the API request
#     payload = {
#         "model": model_id,  # Specify the model version
#         "prompt": "Given image is a handwritten text in english language, read it carefully and extract all the text mentioned in it.",
#         "images": [image_base64],  # List of Base64-encoded images
#         "stream": False
#     }

#     # Headers for the request
#     headers = {
#         "Content-Type": "application/json"
#     }

#     # Send the POST request
#     response = requests.post(OLLAMA_URL, json=payload, headers=headers)

#     # Check the response
#     if response.status_code == 200:
#         data = response.json()
#         print("Response from Gemma 3:")
#         print(data.get("response", "No response field in the API response."))
#     else:
#         print(f"Error: {response.status_code}")
#         print(response.text)
#     return response.text

#     # Path to your image



# def _hit_endpoint(name):
#     import requests
#     import json

#     # Define the URL of the Ollama server
#     OLLAMA_URL = "http://localhost:11434/api/generate"

#     # Define the request payload
#     payload = {
#         "model": model_id,  # Change this to your desired model
#         "prompt": name,
#         "stream": False
#     }

#     # Make the request
#     response = requests.post(OLLAMA_URL, json=payload)

#     # Parse and print the response
#     if response.status_code == 200:
#         data = response.json()
#         print(data["response"])  # Extracting the generated text
#         return data["response"]
#     else:
#         print(f"Error: {response.status_code} - {response.text}")
#         return "An error occurred!"
import os
import torch
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
import os
from huggingface_hub import login
import os

login(token=os.getenv("hf_token") )



model_id = os.getenv("MODEL_ID", "google/gemma-3-12b-it")
processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager",cache_dir = "F:\\huggingface_cache"
)

def run_fn(message):
    messages_list = []
    '''
    conversation = [
                {
                    "role": "user",
                    "content": [
                        {"type": "image", "image": "https://www.ilankelman.org/stopsigns/australia.jpg"},
                        {"type": "text", "text": "Please describe this image in detail."},
                    ],
                },
            ]
    '''
    messages_list.append({"role": "user", "content":[{ "type":"text","text": message}] })

    inputs = processor.apply_chat_template(
        messages_list,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(device=model.device, dtype=torch.bfloat16)
    streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
    max_new_tokens = 100
    generate_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
    )
    outputs = model.generate(**generate_kwargs)
    return outputs
    # return None
def greet(name):
    return run_fn(name)

demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch()