Spaces:

Omarrran
/

kashmiri_text_generation_trail

Running

App Files Files Community

Omarrran commited on Oct 26, 2024

Commit

e6f29fe

verified ·

1 Parent(s): 2d50f60

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import torch
+import json
+from transformers import GPT2Config
+from torch import nn
+import requests
+from pathlib import Path
+class TextGenerator(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_dim):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
+        self.fc = nn.Linear(hidden_dim, vocab_size)
+    def forward(self, x):
+        x = self.embedding(x)
+        lstm_out, _ = self.lstm(x)
+        return self.fc(lstm_out)
+def download_file(url, local_path):
+    response = requests.get(url)
+    if response.status_code == 200:
+        Path(local_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(local_path, 'wb') as f:
+            f.write(response.content)
+    else:
+        raise Exception(f"Failed to download {url}")
+def load_model_and_tokenizers():
+    # Create a local directory for downloaded files
+    cache_dir = Path("model_cache")
+    cache_dir.mkdir(exist_ok=True)
+    # URLs for the files
+    base_url = "https://huggingface.co/Omarrran/temp_data/raw/main"
+    files = {
+        "model.pt": f"{base_url}/model.pt",
+        "word_to_int.json": f"{base_url}/word_to_int.json",
+        "int_to_word.json": f"{base_url}/int_to_word.json",
+        "model_config.json": f"{base_url}/model_config.json"
+    }
+    # Download all files
+    for filename, url in files.items():
+        local_path = cache_dir / filename
+        if not local_path.exists():
+            print(f"Downloading {filename}...")
+            download_file(url, local_path)
+    # Load configuration
+    with open(cache_dir / "model_config.json", "r") as f:
+        config = json.load(f)
+    # Load tokenizers
+    with open(cache_dir / "word_to_int.json", "r") as f:
+        word_to_int = json.load(f)
+    with open(cache_dir / "int_to_word.json", "r") as f:
+        int_to_word = json.load(f)
+    # Initialize model
+    model = TextGenerator(
+        vocab_size=config['vocab_size'],
+        embedding_dim=config['embedding_dim'],
+        hidden_dim=config['hidden_dim']
+    )
+    # Load model weights
+    model.load_state_dict(torch.load(cache_dir / "model.pt", map_location=torch.device('cpu')))
+    model.eval()
+    return model, word_to_int, int_to_word
+def generate_text(prompt, max_length=100):
+    # Load model and tokenizers (will use cached files after first load)
+    model, word_to_int, int_to_word = load_model_and_tokenizers()
+    # Tokenize input prompt
+    input_ids = [word_to_int.get(word, word_to_int['<UNK>']) for word in prompt.split()]
+    input_tensor = torch.tensor([input_ids])
+    # Generate text
+    generated_ids = input_ids.copy()
+    with torch.no_grad():
+        for _ in range(max_length):
+            current_input = torch.tensor([generated_ids[-50:]])  # Use last 50 tokens as context
+            outputs = model(current_input)
+            next_token_id = outputs[0, -1, :].argmax().item()
+            generated_ids.append(next_token_id)
+            if next_token_id == word_to_int.get('<EOS>', 0):
+                break
+    # Convert ids back to text
+    generated_text = ' '.join([int_to_word.get(str(idx), '<UNK>') for idx in generated_ids])
+    return generated_text
+# Create Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(label="Enter your prompt", placeholder="Type your text here..."),
+        gr.Slider(minimum=10, maximum=200, value=100, label="Maximum length", step=1)
+    ],
+    outputs=gr.Textbox(label="Generated Text"),
+    title="Text Generation Model",
+    description="Enter a prompt and the model will generate text based on it.",
+    examples=[
+        ["The quick brown fox"],
+        ["Once upon a time"],
+        ["In a galaxy far"]
+    ]
+)
+# Launch the interface
+if __name__ == "__main__":
+    iface.launch()