Spaces:

abdull4h
/

C4AI-Arabic-Playground

Sleeping

App Files Files Community

abdull4h commited on Mar 1

Commit

ff2315d

verified ·

1 Parent(s): ca20ed2

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -16

app.py CHANGED Viewed

@@ -95,16 +95,21 @@ def generate_text(prompt, max_length=100, temperature=0.7, force_arabic=True):
         else:
             enhanced_prompt = prompt
-        # Create input for the model
         print(f"Generating response for: {enhanced_prompt[:50]}...")
-        # Use direct input approach instead of chat template
-        input_ids = tokenizer.encode(enhanced_prompt, return_tensors="pt").to(model.device)
-        # Generate with all compiler features disabled
         with torch.inference_mode():
             gen_tokens = model.generate(
                 input_ids,
                 max_new_tokens=int(max_length),
                 do_sample=True,
                 temperature=float(temperature),
@@ -113,20 +118,18 @@ def generate_text(prompt, max_length=100, temperature=0.7, force_arabic=True):
                 pad_token_id=tokenizer.eos_token_id
             )
-        # Decode full output
-        full_output = tokenizer.decode(gen_tokens[0], skip_special_tokens=False)
-        print(f"Raw output: {full_output[:100]}...")
-        # Clean the response
-        cleaned_response = clean_response(full_output)
-        # If response still starts with the prompt, remove it
-        if cleaned_response.startswith(enhanced_prompt):
-            cleaned_response = cleaned_response[len(enhanced_prompt):].strip()
-        elif cleaned_response.startswith(prompt):
-            cleaned_response = cleaned_response[len(prompt):].strip()
         print(f"Final cleaned response: {cleaned_response[:100]}...")
         return cleaned_response
     except Exception as e:
@@ -163,7 +166,7 @@ with gr.Blocks(title="Cohere Arabic Model Demo") as demo:
                     gr.Button(example).click(
                         fn=lambda e=example: e,
                         inputs=[],
-                        outputs=prompt
                     )
             # Parameters

         else:
             enhanced_prompt = prompt
+        # Create input for the model using proper tokenization with attention mask
         print(f"Generating response for: {enhanced_prompt[:50]}...")
+        # Use the tokenizer to get both input_ids and attention_mask
+        encoding = tokenizer(enhanced_prompt, return_tensors="pt", padding=True)
+        input_ids = encoding.input_ids.to(model.device)
+        attention_mask = encoding.attention_mask.to(model.device)
+        print(f"Input shape: {input_ids.shape}, Attention mask shape: {attention_mask.shape}")
+        # Generate with all compiler features disabled and proper attention mask
         with torch.inference_mode():
             gen_tokens = model.generate(
                 input_ids,
+                attention_mask=attention_mask,  # Add attention mask
                 max_new_tokens=int(max_length),
                 do_sample=True,
                 temperature=float(temperature),
                 pad_token_id=tokenizer.eos_token_id
             )
+        # Get only the generated part (exclude the prompt)
+        input_length = input_ids.shape[1]
+        generated_tokens = gen_tokens[0][input_length:]
+        # Decode just the generated part
+        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        print(f"Generated text (after input): {generated_text[:100]}...")
+        # Clean any remaining special tokens
+        cleaned_response = clean_response(generated_text)
         print(f"Final cleaned response: {cleaned_response[:100]}...")
         return cleaned_response
     except Exception as e:
                     gr.Button(example).click(
                         fn=lambda e=example: e,
                         inputs=[],
+                        outputs=[prompt]
                     )
             # Parameters