Upload news_content_generator.py

#3
by psv901 - opened
Files changed (1) hide show
  1. rag_sec/news_content_generator.py +81 -0
rag_sec/news_content_generator.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
3
+
4
+ # Dummy Data: Detailed news articles
5
+ news_articles = [
6
+ """Artificial Intelligence (AI) is revolutionizing industries by enhancing automation and boosting operational efficiency.
7
+ Companies are leveraging AI to analyze data at scale, optimize logistics, and improve customer experiences.
8
+ One notable development is the integration of AI in healthcare, where it aids in diagnosing diseases and personalizing treatment plans.
9
+ Experts believe that these advancements will continue to transform how businesses operate in the coming years.""",
10
+
11
+ """The field of AI has seen remarkable breakthroughs in natural language understanding, making it possible for machines to comprehend and generate human-like text.
12
+ Researchers are pushing boundaries with transformer-based architectures, enabling applications like conversational agents, language translation, and content creation.
13
+ These advancements are not only enhancing user interactions but also opening doors for innovative applications across various domains.""",
14
+
15
+ """AI trends are shaping the future of technology and business by enabling smarter decision-making and predictive analytics.
16
+ Industries such as finance, manufacturing, and retail are adopting AI-driven solutions to optimize processes and gain a competitive edge.
17
+ As AI tools become more accessible, even small businesses are leveraging these technologies to scale operations and deliver better services to customers.""",
18
+ ]
19
+
20
+ # Load T5 Model and Tokenizer
21
+ t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
22
+ t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
23
+
24
+
25
+ # Step 1: Input
26
+ def get_user_prompt():
27
+ return input("Enter your prompt (e.g., 'Create a LinkedIn post about AI trends'): ")
28
+
29
+
30
+ # Step 2: Summarization (Document Retrieval + Summarization)
31
+ def summarize_articles(articles):
32
+ summaries = []
33
+ for article in articles:
34
+ input_text = f"summarize: {article}"
35
+ inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
36
+ outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4,
37
+ early_stopping=True)
38
+ summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
39
+ summaries.append(summary)
40
+ return summaries
41
+
42
+
43
+ # Step 3: Content Generation
44
+ def generate_content(prompt, summarized_content):
45
+ combined_prompt = f"{prompt}\n\nSummarized Insights:\n" + "\n".join(summarized_content)
46
+ input_text = f"generate: {combined_prompt}"
47
+ inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
48
+ outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
49
+ generated_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+ return generated_text
51
+
52
+
53
+ # Step 4: Logging with Chagu (Dummy Implementation)
54
+ def log_with_chagu(stage, content):
55
+ print(f"\n[CHAGU LOG - {stage}]:\n{content}\n")
56
+
57
+
58
+ # Step 5: Output
59
+ def display_output(content):
60
+ print("\nGenerated Content:")
61
+ print(content)
62
+ print("\nTransparency Report:")
63
+ print("All transformations logged in Chagu for auditability.")
64
+
65
+
66
+ # Main Workflow
67
+ def main():
68
+ user_prompt = get_user_prompt() # Properly take user input
69
+ log_with_chagu("Input Prompt", user_prompt)
70
+
71
+ summarized_content = summarize_articles(news_articles)
72
+ log_with_chagu("Summarized Articles", "\n".join(summarized_content))
73
+
74
+ final_output = generate_content(user_prompt, summarized_content)
75
+ log_with_chagu("Generated Content", final_output)
76
+
77
+ display_output(final_output)
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()