Upload 7 files

Browse files

Files changed (7) hide show

chat_template.jinja +87 -0
config.json +32 -0
generation_config.json +12 -0
model.safetensors +3 -0
readme.md +124 -0
tokenizer.json +0 -0
tokenizer_config.json +16 -0

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,87 @@

+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}
+{%- set ns = namespace() %}
+{%- set ns.index = 0 %}
+{%- for message in loop_messages %}
+    {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
+        {%- if (message["role"] == "user") != (ns.index % 2 == 0) %}
+            {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+        {%- endif %}
+        {%- set ns.index = ns.index + 1 %}
+    {%- endif %}
+{%- endfor %}
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+            {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"]|trim + eos_token}}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 256,
+  "initializer_range": 0.02,
+  "intermediate_size": 1024,
+  "max_position_embeddings": 512,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 21,
+  "num_key_value_heads": 4,
+  "pad_token_id": 2,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_parameters": {
+    "rope_theta": 10000.0,
+    "rope_type": "default"
+  },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.2.0",
+  "use_cache": false,
+  "vocab_size": 32768
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": [
+    2
+  ],
+  "output_attentions": false,
+  "output_hidden_states": false,
+  "pad_token_id": 2,
+  "transformers_version": "5.2.0",
+  "use_cache": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b517d4a950a083dfac9aa1d16f3c05103b3b00a83f926ee6f8d2fc2dbb717293
+size 121699864

readme.md ADDED Viewed

	@@ -0,0 +1,124 @@

+**Model Card: Stentor Python 30M**
+**Model Description**
+Stentor Python 30M is a compact language model specifically fine-tuned for Python code generation and autocompletion tasks. Based on the Stentor-30M architecture, this model contains 30 million parameters and is designed to run efficiently on resource-constrained devices including mobile phones and embedded systems.
+**Model Details**
+- **Developed by:** Experimental fine-tuning project
+- **Model type:** Causal language model (LlamaForCausalLM)
+- **Language:** Python code, English instructions
+- **Parameters:** 30,419,712
+- **Context length:** 512 tokens
+- **Model size:** 60 MB (FP16), 30 MB (INT8)
+- **License:** Apache 2.0
+**Training Data**
+The model was fine-tuned on a curated dataset of 872 Python examples, including:
+- Basic algorithms (factorial, prime numbers, list operations)
+- Class implementations (Stack, BankAccount, Rectangle, Circle)
+- Recursive functions (quicksort, Fibonacci)
+- String manipulation (palindrome, anagram, vowel counting)
+- MBPP (Mostly Basic Python Problems) dataset tasks
+All examples follow a consistent format with "### Task:" instruction and "### Solution:" code block.
+**Training Process**
+The fine-tuning process involved multiple stages:
+1. Base model: Stentor-30M pre-trained checkpoint
+2. Initial fine-tuning on 50k examples (checkpoint-1000 selected as best)
+3. Multiple correction rounds with progressively lower learning rates
+4. Final detoxification training with learning rate 3e-7 to remove undesirable patterns
+**Evaluation Results**
+The model was evaluated on several test categories:
+| Category | Pass Rate | Notes |
+|----------|-----------|-------|
+| Basic functions | 80% | Factorial, prime check, etc. |
+| Classes from training set | 100% | Stack, BankAccount, Rectangle |
+| New complex classes | 33% | Graph, Queue, inheritance |
+| Function signatures (MBPP) | 100% | Correctly generates def statements |
+**Capabilities**
+- Generates Python functions from natural language descriptions
+- Implements basic algorithms (factorial, prime check, palindrome)
+- Creates class definitions with methods (Stack, BankAccount, Rectangle)
+- Handles recursive functions (quicksort, Fibonacci)
+- Produces syntactically correct function signatures
+**Limitations**
+- May produce repeated or redundant code after the main solution
+- Struggles with complex data structures (graphs, trees, queues)
+- Does not reliably handle class inheritance patterns
+- Can generate incorrect list indexing operations
+- May continue generating text beyond the intended solution
+- Limited to 512 token context window
+- Not suitable for production use without output post-processing
+**Recommended Use Cases**
+- Code autocompletion in lightweight IDEs
+- Educational tool for Python beginners
+- Rapid prototyping of simple functions
+- Embedded systems with limited computational resources
+- Offline code assistance on mobile devices
+**Not Recommended For**
+- Complex algorithm implementation
+- Production code generation without human review
+- Tasks requiring deep contextual understanding
+- Generating large codebases
+- Security-critical applications
+**Usage Example**
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_path = "path/to/stentor-python-30m"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path)
+prompt = "### Task: Write a function that checks if a number is even\n\n### Solution:\n"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.2)
+response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+```
+**Hardware Requirements**
+- **Inference:** CPU only (no GPU required)
+- **RAM:** < 100 MB for inference
+- **Storage:** 60 MB (FP16), 30 MB (INT8 quantized)
+**Ethical Considerations**
+This model is intended for educational and development assistance purposes. Users should verify all generated code before deployment, particularly for security-sensitive applications. The model may occasionally produce incorrect or inefficient code and should not be relied upon as the sole source of truth for programming tasks.
+**Citation**
+If you use this model in your work, please cite:
+```
+@misc{stentor-python-30m-2026,
+  author = {Fine-tuning Experiment},
+  title = {Stentor Python 30M: A Compact Model for Python Code Generation},
+  year = {2026},
+  publisher = {Hugging Face},
+  url = {https://huggingface.co/username/stentor-python-30m}
+}
+```
+**Contact**
+For questions or feedback about this model, please open an issue on the Hugging Face repository.

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": true,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "is_local": true,
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}