Spaces:

chudai1019
/

ChudAI-API

Sleeping

App Files Files Community

chudai1019 commited on Dec 5, 2025

Commit

eae2184

verified ·

1 Parent(s): 6574a80

Upload 5 files

Browse files

Files changed (5) hide show

app.py +44 -0
model10M.pt +3 -0
model_transformer.py +37 -0
requirements.txt +3 -0
tokenizer.json +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+from model_transformer import TransformerLM
+from tokenizers import Tokenizer
+import gradio as gr
+# Load tokenizer
+tok = Tokenizer.from_file("tokenizer.json")
+def encode(text):
+    return tok.encode(text).ids
+def decode(ids):
+    return tok.decode(ids)
+# Load model
+vocab_size = tok.get_vocab_size()
+model = TransformerLM(vocab_size)
+model.load_state_dict(torch.load("model10M.pt", map_location="cpu"))
+model.eval()
+# Text generation
+def generate(prompt, max_len=100):
+    ids = encode(prompt)
+    ids = torch.tensor([ids], dtype=torch.long)
+    for _ in range(max_len):
+        with torch.no_grad():
+            logits = model(ids)
+        next_id = torch.argmax(logits[0, -1]).item()
+        ids = torch.cat([ids, torch.tensor([[next_id]])], dim=1)
+    output = decode(ids[0].tolist())
+    return output
+# Gradio UI
+demo = gr.Interface(
+    fn=generate,
+    inputs=gr.Textbox(lines=2, placeholder="Ask something..."),
+    outputs="text",
+    title="ChudAI (Sandesh Edition)",
+    description="Your custom 10M Transformer AI running on HuggingFace."
+)
+demo.launch()

model10M.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbfdce64e308d8c31e1b8705857b2c6d297033557291f1f94b226dcac19c331a
+size 31756223

model_transformer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+import torch.nn as nn
+class PositionalEncoding(nn.Module):
+    def __init__(self, emb, max_len=2048):
+        super().__init__()
+        pe = torch.zeros(max_len, emb)
+        pos = torch.arange(0, max_len).unsqueeze(1)
+        div = torch.exp(torch.arange(0, emb, 2) * (-torch.log(torch.tensor(10000.0)) / emb))
+        pe[:, 0::2] = torch.sin(pos * div)
+        pe[:, 1::2] = torch.cos(pos * div)
+        self.pe = pe.unsqueeze(0)
+    def forward(self, x):
+        return x + self.pe[:, :x.size(1), :].to(x.device)
+class TransformerLM(nn.Module):
+    def __init__(self, vocab_size, emb=256, n_heads=4, n_layers=4):
+        super().__init__()
+        self.embed = nn.Embedding(vocab_size, emb)
+        self.pos = PositionalEncoding(emb)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=emb,
+            nhead=n_heads,
+            dim_feedforward=512,
+            batch_first=True
+        )
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
+        self.head = nn.Linear(emb, vocab_size)
+    def forward(self, x):
+        x = self.embed(x)
+        x = self.pos(x)
+        x = self.transformer(x)
+        return self.head(x)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+tokenizers
+gradio

tokenizer.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "Sequence",
+    "normalizers": [
+      {
+        "type": "NFKC"
+      }
+    ]
+  },
+  "pre_tokenizer": {
+    "type": "Whitespace"
+  },
+  "post_processor": null,
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "[PAD]": 0,
+      "[UNK]": 1,
+      "[CLS]": 2,
+      "[SEP]": 3,
+      "[MASK]": 4,
+      "I": 5,
+      ".": 6,
+      ",": 7,
+      "a": 8,
+      "am": 9,
+      "and": 10,
+      "is": 11,
+      "not": 12,
+      "'": 13,
+      "-": 14,
+      "...”": 15,
+      ":": 16,
+      "AI": 17,
+      "ChatGPT": 18,
+      "ChudAI": 19,
+      "If": 20,
+      "My": 21,
+      "Sandesh": 22,
+      "Transformer": 23,
+      "answer": 24,
+      "assistant": 25,
+      "avoid": 26,
+      "based": 27,
+      "but": 28,
+      "by": 29,
+      "completely": 30,
+      "concisely": 31,
+      "created": 32,
+      "don": 33,
+      "here": 34,
+      "intelligently": 35,
+      "know": 36,
+      "like": 37,
+      "logically": 38,
+      "loops": 39,
+      "m": 40,
+      "meaningless": 41,
+      "name": 42,
+      "of": 43,
+      "on": 44,
+      "pretend": 45,
+      "repetitive": 46,
+      "respond": 47,
+      "say": 48,
+      "sentences": 49,
+      "simplified": 50,
+      "small": 51,
+      "sure": 52,
+      "t": 53,
+      "things": 54,
+      "think": 55,
+      "to": 56,
+      "trained": 57,
+      "unsure": 58,
+      "version": 59,
+      "was": 60,
+      "what": 61,
+      "’": 62,
+      "“": 63
+    },
+    "unk_token": "[UNK]"
+  }
+}