TI_RAG_Demo_L3.1

Runtime error

arjunanand13 commited on May 6

Commit

0501f59

•

1 Parent(s): c277bbe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ from huggingface_hub import InferenceClient
 Loading of the LLama3 model
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
 device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
 # set quantization configuration to load large model with less GPU memory

 Loading of the LLama3 model
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+model_id = 'meta-llama/Meta-Llama-3-8B'
 device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
 # set quantization configuration to load large model with less GPU memory