Spaces:

RamAI123
/

sss

Runtime error

App Files Files Community

RamAI123 commited on Mar 8

Commit

1630a37

•

1 Parent(s): d003d76

Upload 6 files

Browse files

Files changed (4) hide show

Dockerfile.txt +14 -0
app.py +321 -0
db.sql +0 -0
requirements.txt +12 -0

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,321 @@

+from flask import Flask, render_template, request, jsonify, render_template_string
+from flask_cors import CORS
+from newspaper import Article
+from transformers import pipeline
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, T5ForConditionalGeneration, T5Tokenizer
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+from sklearn.preprocessing import LabelEncoder
+import joblib
+import mysql.connector
+from flask import send_file
+from reportlab.pdfgen import canvas
+import io
+from reportlab.lib.pagesizes import letter
+from reportlab.lib import colors
+from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, PageBreak, Paragraph
+from nltk.tokenize import sent_tokenize
+from reportlab.platypus import Spacer
+from reportlab.platypus.flowables import KeepTogether
+from reportlab.lib.styles import getSampleStyleSheet
+import datetime
+app = Flask(__name__, template_folder='templates')
+CORS(app)
+chat_history = []
+mysql_config = {
+    'host': 'localhost',
+    'user': 'root',
+    'password': '9553641651',
+    'database': 'articles'
+}
+def insert_question_and_answer(question, answer,timestamp):
+    try:
+        # Connect to the MySQL database
+        connection = mysql.connector.connect(**mysql_config)
+        cursor = connection.cursor()
+        # SQL query to insert a new record into the 'supplychain' table
+        query = "INSERT INTO supplychain143 (question, answer, timestamp) VALUES (%s, %s, %s);"
+        values = (question, answer,timestamp)
+        # Execute the query
+        cursor.execute(query, values)
+        # Commit the changes
+        connection.commit()
+        # Close the cursor and connection
+        cursor.close()
+        connection.close()
+        print("Record inserted successfully!")
+    except Exception as e:
+        print("Error inserting record:", str(e))
+def retrieve_article_content(timestamp):
+    try:
+        # Connect to the MySQL database
+        connection = mysql.connector.connect(**mysql_config)
+        cursor = connection.cursor()
+        # SQL query to retrieve article content based on the question
+        query = "SELECT question, answer FROM supplychain143 WHERE timestamp = %s;"
+        values = (timestamp,)
+        # Execute the query
+        cursor.execute(query, values)
+        # Fetch the results
+        results = cursor.fetchall()
+        # Close the cursor and connection
+        cursor.close()
+        connection.close()
+        return results
+    except Exception as e:
+        print("Error retrieving article content:", str(e))
+        return None
+def scrape_news_content(url):
+    # ... (Your existing implementation)
+    try:
+      article = Article(url)
+      article.download()
+      article.parse()
+      title = article.title
+      content = article.text
+      return content
+ # Remove leading/trailing whitespaces
+    except Exception as e:
+      return "Error: " + str(e)
+def summarize_with_t5(article_content, classification, model, tokenizer, device):
+    # ... (Your existing implementation)
+    article_content = str(article_content)
+    prompt = "Classification: " + str(classification) + "\n"
+    if not article_content or article_content == "nan":
+        return "", ""
+    if classification == "risks":
+        prompt = "summarize the key supply chain risks: "
+    elif classification == "opportunities":
+        prompt = "summarize the key supply chain opportunities: "
+    elif classification == "neither":
+        print("Nooo")
+        return "None", "None"
+    input_text = prompt + article_content
+    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
+    model = model.to(device)  #/ Move the model to the correct device
+    summary_ids = model.generate(input_ids.to(device), max_length=150, num_beams=4, length_penalty=2.0, early_stopping=True)
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    print(summary)
+    if classification in ["risks", "opportunities"]:
+        if classification == "risks":
+            return summary, "None"
+        elif classification == "opportunities":
+            return "None", summary
+        else:
+          return None,None
+    else:
+        return ("This article is not classified as related to the supply chain.")
+def classify_and_summarize(input_text, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device):
+    # ... (Your existing implementation)
+    results = []
+    request_timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    input_text=input_text.split(",")
+    for url in input_text:
+        if url.startswith("http"):
+            # If the input starts with "http", assume it's a URL and extract content
+            article_content = scrape_news_content(url)
+        else:
+            # If the input is not a URL, assume it's the content
+            article_content = url
+        # Perform sentiment classification
+        inputs_cls = tokenizer_cls(article_content, return_tensors="pt", max_length=512, truncation=True, padding=True)
+        inputs_cls = {key: value.to(device) for key, value in inputs_cls.items()}
+        # Move cls_model to the specified device
+        cls_model = cls_model.to(device)
+        outputs_cls = cls_model(**inputs_cls)
+        logits_cls = outputs_cls.logits
+        predicted_class = torch.argmax(logits_cls, dim=1).item()
+        classification = label_encoder.inverse_transform([predicted_class])[0]
+        # Perform summarization based on the classification
+        summary_risk, summary_opportunity = summarize_with_t5(article_content, classification, model_summ, tokenizer_summ, device)
+        if summary_risk is None:
+            summary_risk = "No risk summary available"
+        if summary_opportunity is None:
+            summary_opportunity = "No opportunity summary available"
+        answer=article_content
+        article_content_words = article_content.split()[:200]
+        short_article_content = ' '.join(article_content_words)
+        insert_question_and_answer(url,answer, request_timestamp)
+        current_request_timestamp=request_timestamp
+        results.append({"Question": url, "Article content":article_content,"Short Article content":short_article_content,"Classification": classification, "Summary risk": summary_risk, "Opportunity Summary": summary_opportunity})
+        print("Result",results)
+    return results
+def generate_sentence_from_keywords(keywords):
+    # Concatenate keywords into a single string
+    keyword_sentence = ' '.join(keywords)
+    # Tokenize the concatenated keywords into sentences
+    sentences = sent_tokenize(keyword_sentence)
+    # If there are sentences, return the first one; otherwise, return a default message
+    return sentences[0] if sentences else "Unable to generate a sentence."
+def is_question(input_text):
+    questioning_words = ["who", "what", "when", "where", "why", "how"]
+    return any(input_text.lower().startswith(q) for q in questioning_words)
+def process_question(user_question,articlecontent):
+    answers = [item[1] for item in articlecontent]
+    context_string = ' '.join(map(str, answers))
+    model_name = "deepset/tinyroberta-squad2"
+    nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
+    QA_input = {'question': user_question, 'context': context_string}
+    print("Debug - QA_input:", QA_input)
+    res = nlp(QA_input)
+    print("Debug - res:", res)
+    print(res['answer'])
+    return res["answer"]
+def generate_pdf(chat_history):
+    # Create a PDF document using ReportLab
+    buffer = io.BytesIO()
+    # Adjust the page size and margins as needed
+    pdf = SimpleDocTemplate(buffer, pagesize=letter)
+    # List to store the content for the PDF
+    pdf_content = []
+    # Get sample styles for formatting
+    styles = getSampleStyleSheet()
+    # Maximum characters per line
+    max_chars_per_line = 100
+    # Write chat history to the PDF
+    for message in chat_history:
+        if isinstance(message, dict):
+            for key, value in message.items():
+                formatted_value = value[:max_chars_per_line] + ('...' if len(value) > max_chars_per_line else '')
+                pdf_content.append(Paragraph(f"<strong>{key}:</strong> {formatted_value}", styles['Normal']))
+        elif isinstance(message, str):
+            formatted_message = message[:max_chars_per_line] + ('...' if len(message) > max_chars_per_line else '')
+            pdf_content.append(Paragraph(formatted_message, styles['Normal']))
+        else:
+            formatted_message = str(message)[:max_chars_per_line] + ('...' if len(str(message)) > max_chars_per_line else '')
+            pdf_content.append(Paragraph(formatted_message, styles['Normal']))
+        pdf_content.append(Spacer(1, 10))  # Add space between messages
+    # Build PDF document
+    pdf.build(pdf_content)
+    buffer.seek(0)
+    return buffer.getvalue()
+@app.route('/download_pdf', methods=['GET'])
+def download_pdf():
+    # Generate a PDF document based on chat history
+    pdf_buffer = generate_pdf(chat_history)
+    # Provide the PDF as a download
+    return send_file(
+        io.BytesIO(pdf_buffer),
+        as_attachment=True,
+        download_name='chat_history.pdf',
+        mimetype='application/pdf'
+    )
+current_request_timestamp = None
+@app.route('/', methods=['GET', 'POST'])
+def home():
+    global current_request_timestamp
+    classification = None
+    summary_risk = None
+    summary_opportunity = None
+    article_content = None
+    input_submitted = False
+    if request.method == 'POST':
+        url_input = request.form['userInput']
+        print("Form Data:", request.form)
+        input_submitted = True
+        print(url_input)
+        cls_model = AutoModelForSequenceClassification.from_pretrained("riskclassification_finetuned_xlnet_model_ld")
+        tokenizer_cls = AutoTokenizer.from_pretrained("xlnet-base-cased")
+        label_encoder_path = "riskclassification_finetuned_xlnet_model_ld/encoder_labels.pkl"
+        label_encoder = LabelEncoder()
+        # Assuming 'label_column values' is the column you want to encode
+        label_column_values = ["risks","opportunities","neither"]
+        label_encoder.fit_transform(label_column_values)
+        joblib.dump(label_encoder, label_encoder_path)
+        model_summ = T5ForConditionalGeneration.from_pretrained("t5-small")
+        tokenizer_summ = T5Tokenizer.from_pretrained("t5-small")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if url_input.startswith("http"):
+            current_request_timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            # If the input starts with "http", assume it's a URL and extract content
+            totalresult = classify_and_summarize(
+                url_input, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device
+            )
+            chat_history.extend(totalresult)
+            '''first={"Classification":classification}
+            second={"Summary risk":summary_risk}
+            opp={"Opportunity Summary":summary_opportunity}
+            third={"Article content":article_content}
+            chat_history.extend([{"Question":url_input}])
+            chat_history.extend([first])
+            chat_history.extend([second])
+            chat_history.extend([opp])
+            chat_history.extend([third])
+            chat_history.extend([{"Short Article content":short_article_content}]) ''' # Display only the first 200 words
+            '''return render_template('index.html', classification=classification, summary_risk=summary_risk,
+                               summary_opportunity=summary_opportunity, article_content=article_content,
+                               input_submitted=input_submitted, chat_history=chat_history)'''
+        elif is_question(url_input):
+            # If the input starts with questioning words, process the question
+            timestamp= datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            if current_request_timestamp and current_request_timestamp is not None:
+                articlecontent = retrieve_article_content(current_request_timestamp)
+                #articlecontent=retrieve_article_content()
+                answer = process_question(url_input,articlecontent)
+             # You need to implement process_question function
+                insert_question_and_answer(url_input,answer,timestamp)
+                uq={"User Question": url_input}
+                chat_history.extend([uq])
+                ma={"Model Answer": answer}
+                chat_history.extend([ma])
+           # return render_template('index.html', question=url_input,answer=answer,chat_history=chat_history)
+    print("chat history",chat_history)
+    return render_template('index.html', chat_history=chat_history,classification=classification, summary_risk=summary_risk, summary_opportunity=summary_opportunity, article_content=article_content, input_submitted=input_submitted)
+if __name__ == '__main__':
+    app.run(debug=True)

db.sql ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Flask==3.0.2
+flask_cors==4.0.0
+torch==2.2.1
+news-fetch==0.2.8
+transformers==4.37.2
+newspaper3k==0.2.8
+nltk==3.8.1
+reportlab==4.1.0
+scikit-learn==1.4.1
+joblib==1.3.2
+sentencepiece==0.2.0
+ mysql-connector-python==8.3.0