croswil
/

Llama_Llama-3.1-8B-Instruct

Inference Endpoints

Model card Files Files and versions Community

croswil commited on Nov 21, 2024

Commit

c6a3e9d

·

verified ·

1 Parent(s): 581ce45

Upload

Files changed (2) hide show

config.json +10 -0
inference.py +15 -0

config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "model_type": "llama",
+    "task": "text-generation",
+    "hidden_size": 4096,
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "vocab_size": 32000,
+    "max_position_embeddings": 2048,
+    "initializer_range": 0.02
+}

inference.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import subprocess
+def infer(prompt):
+    # Example using llama.cpp
+    result = subprocess.run(
+        ["llama", "-m", "Meta-Llama-3.1-8B-Instruct-Q4_K_M", "-p", prompt],
+        capture_output=True,
+        text=True,
+    )
+    return result.stdout
+# Hugging Face Inference API expects a handler function
+def handler(data, context):
+    prompt = data.get("inputs", "")
+    return infer(prompt)