gemma-2-9b-it1

Runtime error

App Files Files Community

Leri777 commited on Oct 9, 2024

Commit

4df36c7

verified ·

1 Parent(s): a5219f1

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -3,8 +3,7 @@ import logging
 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
-from accelerate import Accelerator
-from transformers import AutoModelForCausalLM, GemmaTokenizerFast, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
@@ -20,20 +19,22 @@ logger.addHandler(file_handler)
 logger.debug("Application started")
 model_id = "google/gemma-2-9b-it"
-tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 # Load model with GPU availability check
 if torch.cuda.is_available():
     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto", torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto", low_cpu_mem_usage=True, token=os.getenv('HF_TOKEN'),
     )
 model.eval()
@@ -53,6 +54,7 @@ pipe = pipeline(
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
 # Define the conversation template for LangChain
 template = """<|im_start|>system
@@ -68,12 +70,12 @@ template = """<|im_start|>system
 prompt = PromptTemplate(
     template=template, input_variables=["system_prompt", "history", "human_input"]
 )
-chain = prompt | chat_model
 # Prediction function using LangChain and model
-def predict(message, chat_history=[]):
     formatted_history = "\n".join(
-        [f"<|im_start|>{entry['role']}\n{entry['content']}<|im_end|>" for entry in chat_history]
     )
     system_prompt = "You are a helpful coding assistant."
@@ -91,14 +93,12 @@ def predict(message, chat_history=[]):
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
-    inputs=[
-        gr.Textbox(label="User input"),
-        gr.State(),
-    ],
-    outputs="text", allow_flagging='never',
     live=True,
 )
 interface.launch()
-logger.debug("Chat interface initialized and launched")

 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 logger.debug("Application started")
 model_id = "google/gemma-2-9b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Load model with GPU availability check
 if torch.cuda.is_available():
     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        low_cpu_mem_usage=True,
+        use_auth_token=os.getenv('HF_TOKEN'),
     )
 model.eval()
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
+logger.debug("Model and tokenizer loaded successfully")
 # Define the conversation template for LangChain
 template = """<|im_start|>system
 prompt = PromptTemplate(
     template=template, input_variables=["system_prompt", "history", "human_input"]
 )
+chain = LLMChain(llm=chat_model, prompt=prompt)
 # Prediction function using LangChain and model
+def predict(message, history=[]):
     formatted_history = "\n".join(
+        [f"<|im_start|>{entry['role']}\n{entry['content']}<|im_end|>" for entry in history]
     )
     system_prompt = "You are a helpful coding assistant."
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
+    inputs=gr.Textbox(label="User input"),
+    outputs="text",
+    allow_flagging='never',
     live=True,
 )
 interface.launch()
+logger.debug("Chat interface initialized and launched")