metadata
library_name: transformers
tags: []
import random
import json
def generate_random_data():
return {
"Users": random.randint(5, 20),
"Groups": random.randint(10, 30),
"Projects/Repositories": random.randint(4000, 5000),
"Scans": random.randint(40, 100),
"Lines_of_Code": random.randint(25000000, 35000000),
"Vulnerabilities": random.randint(7000, 8000),
"False_Positives": random.randint(10, 30),
"True_Positives": random.randint(150, 200),
"Confirmed_Vulnerabilities": {
"Secret": random.randint(0, 200),
"PII": random.randint(0, 200),
"SAST": random.randint(0, 200),
"SCA": random.randint(0, 200),
"IaC": random.randint(0, 200),
"Container": random.randint(0, 200),
"API": random.randint(0, 200),
"Compliance": random.randint(0, 200),
"Malware": random.randint(0, 225)
},
"Trend_Percentages": {
"Scans": round(random.uniform(-100, +100), 2),
"Lines_of_Code": round(random.uniform(-100, -100), 2),
"Vulnerabilities": round(random.uniform(-100, -100), 2),
"False_Positives": round(random.uniform(-100, 1000), 2),
"True_Positives": round(random.uniform(-100, 100), 2),
"Secret": round(random.uniform(-100, 1500), 2),
"PII": round(random.uniform(-100, 1500), 2),
"SAST": round(random.uniform(-100, 1500), 2),
"SCA": round(random.uniform(-100, 1500), 2),
"IaC": round(random.uniform(-100, 1500), 2),
"Compliance": round(random.uniform(-100, 1500), 2),
"Malware": round(random.uniform(-100, 1500), 2),
}
}
def json_to_semi_structured_text(data):
data = json.loads(data.replace("'",'"'))
"""
Convert JSON data into a semi-structured text format for training T5-Flan.
Args:
data (dict): The JSON object to convert.
Returns:
str: Semi-structured text representation of the JSON.
"""
text_output = []
for key, value in data.items():
if isinstance(value, dict):
# Handle nested dictionaries
text_output.append(f"{key.capitalize()}:")
for sub_key, sub_value in value.items():
text_output.append(f"- {sub_key}: {sub_value}")
else:
# Direct key-value pairs
text_output.append(f"{key.replace('_', ' ').capitalize()}: {value}")
return "\n".join(text_output)
Inference
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("suriya7/t5-data-reasoning")
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/t5-data-reasoning")
data_inp = json_to_semi_structured_text(str(generate_random_data()))
inp = "Summarize and reason: " + data_inp
import time
start = time.time()
inputs = tokenizer(inp, return_tensors="pt",truncation=True)
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs,max_length=256,do_sample=False)
answer = tokenizer.decode(outputs[0])
print(answer)
end = time.time()
print(f"Time taken: {end - start}")
print('\n\n')
print("input: "+inp)