Spaces:

RamAI123
/

s4

Sleeping

App Files Files Community

RamAI123 commited on Feb 26

Commit

4943759

•

1 Parent(s): 67ff1f7

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -4

app.py CHANGED Viewed

@@ -5,9 +5,11 @@ from newsfetch.news import newspaper
 from transformers import pipeline
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from newspaper import Article
 from sklearn.preprocessing import LabelEncoder
 import joblib
 # Example usage:
@@ -23,6 +25,11 @@ def main():
       try:
           news_article = newspaper(url)
           print("scraped: ",news_article)
           return news_article.article
       except Exception as e:
           return "Error: " + str(e)
@@ -57,15 +64,35 @@ def main():
               return None,None
         else:
             st.write("This article is not classified as related to the supply chain.")
     def classify_and_summarize(input_text, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device):
         if input_text.startswith("http"):
             # If the input starts with "http", assume it's a URL and extract content
             article_content = scrape_news_content(input_text)
         else:
             # If the input is not a URL, assume it's the content
             article_content = input_text
         # Perform sentiment classification
         inputs_cls = tokenizer_cls(article_content, return_tensors="pt", max_length=512, truncation=True)
         inputs_cls = {key: value.to(device) for key, value in inputs_cls.items()}
@@ -90,11 +117,13 @@ def main():
             print("No opportunity summary generated.")
             summary_opportunity = "No opportunity summary available"  # Provide a default value or handle accordingly
-        return classification, summary_risk, summary_opportunity
     print(url_input)
     cls_model =AutoModelForSequenceClassification.from_pretrained("riskclassification_finetuned_xlnet_model_ld")
     tokenizer_cls = AutoTokenizer.from_pretrained("xlnet-base-cased")
     label_encoder = LabelEncoder()
@@ -118,7 +147,7 @@ def main():
-    classification, summary_risk, summary_opportunity = classify_and_summarize(url_input, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device)
     print("Classification:", classification)
     print("Risk Summary:", summary_risk)
@@ -126,10 +155,45 @@ def main():
     # Display the entered URL
-    st.write("Entered URL:", url_input)
     st.write("Classification:",classification)
     st.write("Risk Summary:",summary_risk)
     st.write("Opportunity Summary:",summary_opportunity)
 if __name__ == "__main__":
     main()

 from transformers import pipeline
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 from newspaper import Article
 from sklearn.preprocessing import LabelEncoder
 import joblib
+from datetime import datetime
 # Example usage:
       try:
           news_article = newspaper(url)
           print("scraped: ",news_article)
+          print("Attributes of the newspaper object:", dir(news_article))
+          # Print the methods of the newspaper object
+          print("Methods of the newspaper object:", [method for method in dir(news_article) if callable(getattr(news_article, method))])
+          # Try to print some specific attributes
+          print("Authors:", news_article.authors)
           return news_article.article
       except Exception as e:
           return "Error: " + str(e)
               return None,None
         else:
             st.write("This article is not classified as related to the supply chain.")
     def classify_and_summarize(input_text, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device):
         if input_text.startswith("http"):
             # If the input starts with "http", assume it's a URL and extract content
             article_content = scrape_news_content(input_text)
+            st.write("Entered URL:", url_input)
         else:
             # If the input is not a URL, assume it's the content
             article_content = input_text
+          # Get the number of lines in the text.
+        truncated_content = " ".join(article_content.split()[:150])
+        st.markdown(f"Truncated Content:\n{truncated_content}", unsafe_allow_html=True)
+        # Add a button to toggle between truncated and full content
+        if st.button("Read More"):
+            # Display the full content when the button is clicked
+            full_content = " ".join(article_content.split())
+            st.markdown(f"Full Content:\n{full_content}", unsafe_allow_html=True)
+            # Remove the truncated content when the full content is displayed
+            st.markdown(
+                        """
+                        <script>
+                            document.getElementById("truncated-content").style.display = "none";
+                        </script>
+                        """,
+                        unsafe_allow_html=True
+                    )
         # Perform sentiment classification
         inputs_cls = tokenizer_cls(article_content, return_tensors="pt", max_length=512, truncation=True)
         inputs_cls = {key: value.to(device) for key, value in inputs_cls.items()}
             print("No opportunity summary generated.")
             summary_opportunity = "No opportunity summary available"  # Provide a default value or handle accordingly
+        return classification, summary_risk, summary_opportunity,article_content
     print(url_input)
     cls_model =AutoModelForSequenceClassification.from_pretrained("riskclassification_finetuned_xlnet_model_ld")
+    print(type(cls_model))
     tokenizer_cls = AutoTokenizer.from_pretrained("xlnet-base-cased")
     label_encoder = LabelEncoder()
+    classification, summary_risk, summary_opportunity,article_content = classify_and_summarize(url_input, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device)
     print("Classification:", classification)
     print("Risk Summary:", summary_risk)
     # Display the entered URL
     st.write("Classification:",classification)
     st.write("Risk Summary:",summary_risk)
     st.write("Opportunity Summary:",summary_opportunity)
+    def process_question():
+        # Use session_state to persist variables across sessions
+        if 'qa_history' not in st.session_state:
+            st.session_state.qa_history = []
+        # Input box for user's question
+        user_question_key = st.session_state.question_counter if 'question_counter' in st.session_state else 0
+        user_question = st.text_input("Ask a question about the article content:", key=user_question_key)
+        # Check if "Send" button is clicked
+        send_button_key = f"send_button_{user_question_key}"
+        if st.button("Send", key=send_button_key) and user_question:
+            # Use a question-answering pipeline to generate a response
+            model_name = "deepset/tinyroberta-squad2"
+            nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
+            QA_input = {'question': user_question, 'context': article_content}
+            res = nlp(QA_input)
+            # Display the user's question and the model's answer
+            st.write(f"You asked: {user_question}")
+            st.write("Model's Answer:", res["answer"])
+            # Add the question and answer to the history
+            st.session_state.qa_history.append((user_question, res["answer"]))
+            # Clear the input box
+        # Display the history
+        st.write("Question-Answer History:")
+        for q, a in st.session_state.qa_history:
+            st.write(f"Q: {q}")
+            st.write(f"A: {a}")
+    # Run the function to process questions
+    process_question()
 if __name__ == "__main__":
     main()