croswil commited on
Commit
c6a3e9d
·
verified ·
1 Parent(s): 581ce45
Files changed (2) hide show
  1. config.json +10 -0
  2. inference.py +15 -0
config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "task": "text-generation",
4
+ "hidden_size": 4096,
5
+ "num_attention_heads": 32,
6
+ "num_hidden_layers": 32,
7
+ "vocab_size": 32000,
8
+ "max_position_embeddings": 2048,
9
+ "initializer_range": 0.02
10
+ }
inference.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ def infer(prompt):
4
+ # Example using llama.cpp
5
+ result = subprocess.run(
6
+ ["llama", "-m", "Meta-Llama-3.1-8B-Instruct-Q4_K_M", "-p", prompt],
7
+ capture_output=True,
8
+ text=True,
9
+ )
10
+ return result.stdout
11
+
12
+ # Hugging Face Inference API expects a handler function
13
+ def handler(data, context):
14
+ prompt = data.get("inputs", "")
15
+ return infer(prompt)