chagu-dev / rag_sec /news_content_generator.py
psv901's picture
Upload news_content_generator.py (#3)
91b337f verified
raw
history blame
3.79 kB
import torch
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
# Dummy Data: Detailed news articles
news_articles = [
"""Artificial Intelligence (AI) is revolutionizing industries by enhancing automation and boosting operational efficiency.
Companies are leveraging AI to analyze data at scale, optimize logistics, and improve customer experiences.
One notable development is the integration of AI in healthcare, where it aids in diagnosing diseases and personalizing treatment plans.
Experts believe that these advancements will continue to transform how businesses operate in the coming years.""",
"""The field of AI has seen remarkable breakthroughs in natural language understanding, making it possible for machines to comprehend and generate human-like text.
Researchers are pushing boundaries with transformer-based architectures, enabling applications like conversational agents, language translation, and content creation.
These advancements are not only enhancing user interactions but also opening doors for innovative applications across various domains.""",
"""AI trends are shaping the future of technology and business by enabling smarter decision-making and predictive analytics.
Industries such as finance, manufacturing, and retail are adopting AI-driven solutions to optimize processes and gain a competitive edge.
As AI tools become more accessible, even small businesses are leveraging these technologies to scale operations and deliver better services to customers.""",
]
# Load T5 Model and Tokenizer
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
# Step 1: Input
def get_user_prompt():
return input("Enter your prompt (e.g., 'Create a LinkedIn post about AI trends'): ")
# Step 2: Summarization (Document Retrieval + Summarization)
def summarize_articles(articles):
summaries = []
for article in articles:
input_text = f"summarize: {article}"
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4,
early_stopping=True)
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
summaries.append(summary)
return summaries
# Step 3: Content Generation
def generate_content(prompt, summarized_content):
combined_prompt = f"{prompt}\n\nSummarized Insights:\n" + "\n".join(summarized_content)
input_text = f"generate: {combined_prompt}"
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
generated_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_text
# Step 4: Logging with Chagu (Dummy Implementation)
def log_with_chagu(stage, content):
print(f"\n[CHAGU LOG - {stage}]:\n{content}\n")
# Step 5: Output
def display_output(content):
print("\nGenerated Content:")
print(content)
print("\nTransparency Report:")
print("All transformations logged in Chagu for auditability.")
# Main Workflow
def main():
user_prompt = get_user_prompt() # Properly take user input
log_with_chagu("Input Prompt", user_prompt)
summarized_content = summarize_articles(news_articles)
log_with_chagu("Summarized Articles", "\n".join(summarized_content))
final_output = generate_content(user_prompt, summarized_content)
log_with_chagu("Generated Content", final_output)
display_output(final_output)
if __name__ == "__main__":
main()