|
import gradio as gr |
|
import numpy as np |
|
import random |
|
import re |
|
import os |
|
from io import BytesIO |
|
import shutil |
|
import mechanicalsoup |
|
import pandas as pd |
|
import requests |
|
from reportlab.lib import colors |
|
from reportlab.lib.enums import TA_CENTER |
|
from reportlab.lib.pagesizes import A4 |
|
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet |
|
from reportlab.lib.units import cm |
|
from reportlab.platypus import ( |
|
Image, |
|
Paragraph, |
|
SimpleDocTemplate, |
|
Spacer, |
|
Table, |
|
TableStyle, |
|
) |
|
from unidecode import unidecode |
|
import base64 |
|
from pdf2image import convert_from_bytes |
|
|
|
|
|
|
|
|
|
class PDFPoster: |
|
def __init__(self, deputy_name: str): |
|
self.deputy_name = deputy_name |
|
|
|
def retrieve_deputy_data(self): |
|
self.deputy_data = self.get_deputy_votes_page() |
|
self.votes = self.get_votes_from_politic_page() |
|
self.img_url = self.get_politic_image() |
|
self.party = self.get_politic_party() |
|
return self.votes |
|
|
|
def generate_poster( |
|
self, |
|
vote_list, |
|
message_1: str = "Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!", |
|
message_2: str = "Les 30 juin, et 7 juin, renseignez vous, et votez en connaissance de cause !", |
|
): |
|
df_subset = self.votes[self.votes["vote_topic"].isin(vote_list)] |
|
|
|
buffer = BytesIO() |
|
document = SimpleDocTemplate(buffer, pagesize=A4) |
|
|
|
|
|
styles = getSampleStyleSheet() |
|
title_style = styles["Title"] |
|
title_style.alignment = TA_CENTER |
|
subtitle_style = styles["Heading2"] |
|
subtitle_style.alignment = TA_CENTER |
|
subtitle_style.fontName = "Helvetica-Bold" |
|
|
|
normal_style = styles["Normal"] |
|
normal_style.alignment = TA_CENTER |
|
|
|
red_style = ParagraphStyle( |
|
"red", parent=subtitle_style, textColor=colors.red, fontSize=20 |
|
) |
|
|
|
|
|
title = Paragraph( |
|
f"Les votes de votre député sortant : {self.deputy_name}", title_style |
|
) |
|
subtitle = Paragraph(f"Parti : {self.party} ", subtitle_style) |
|
source = Paragraph(f"Source : {self.deputy_data['url']}", normal_style) |
|
after_text = Paragraph(message_1, subtitle_style) |
|
vote_text = Paragraph(message_2, red_style) |
|
|
|
|
|
|
|
image_response = requests.get(self.img_url) |
|
image_bytes = BytesIO(image_response.content) |
|
image = Image(image_bytes) |
|
image.drawHeight = 6 * cm |
|
image.drawWidth = 5 * cm |
|
|
|
|
|
sentences = df_subset["vote_topic"].tolist() |
|
votes = df_subset["for_or_against"].tolist() |
|
|
|
|
|
table_data = [["Sujet", "Vote"]] |
|
for vote, sentence in zip(sentences, votes): |
|
row = [ |
|
Paragraph(vote, normal_style), |
|
Paragraph(sentence, normal_style), |
|
] |
|
table_data.append(row) |
|
|
|
|
|
table = Table(table_data) |
|
table.setStyle( |
|
TableStyle( |
|
[ |
|
("BACKGROUND", (0, 0), (-1, 0), colors.white), |
|
("TEXTCOLOR", (0, 0), (-1, 0), colors.black), |
|
("ALIGN", (0, 0), (-1, -1), "CENTER"), |
|
("FONTNAME", (0, 0), (-1, 1), "Helvetica-Bold"), |
|
("FONTSIZE", (0, 0), (-1, 0), 14), |
|
("BOTTOMPADDING", (0, 0), (-1, 0), 12), |
|
("ALIGN", (0, 1), (-1, -1), "CENTER"), |
|
("BACKGROUND", (0, 1), (-1, -1), colors.white), |
|
("GRID", (0, 0), (-1, -1), 1, colors.black), |
|
] |
|
) |
|
) |
|
|
|
|
|
def apply_conditional_styles(table, data): |
|
style = TableStyle() |
|
for row_idx, row in enumerate(data): |
|
for col_idx, cell in enumerate(row): |
|
if isinstance(cell, Paragraph): |
|
if "POUR" in cell.text: |
|
style.add( |
|
"BACKGROUND", |
|
(col_idx, row_idx), |
|
(col_idx, row_idx), |
|
colors.green, |
|
) |
|
elif "CONTRE" in cell.text: |
|
style.add( |
|
"BACKGROUND", |
|
(col_idx, row_idx), |
|
(col_idx, row_idx), |
|
colors.red, |
|
) |
|
elif "ABSTENTION" in cell.text: |
|
style.add( |
|
"BACKGROUND", |
|
(col_idx, row_idx), |
|
(col_idx, row_idx), |
|
colors.beige, |
|
) |
|
return style |
|
|
|
table.setStyle(apply_conditional_styles(table, table_data)) |
|
|
|
|
|
elements = [ |
|
title, |
|
Spacer(1, 6), |
|
subtitle, |
|
Spacer(1, 12), |
|
image, |
|
Spacer(1, 12), |
|
table, |
|
source, |
|
Spacer(1, 8), |
|
after_text, |
|
Spacer(1, 8), |
|
vote_text, |
|
] |
|
document.build(elements) |
|
buffer = BytesIO() |
|
|
|
buffer.seek(0) |
|
|
|
return pdf_base64 |
|
|
|
def get_deputy_votes_page(self): |
|
"""Fetches the webpage containing the voting records of a specified deputy. |
|
|
|
Args: |
|
politic_name (str): Name of the deputy. |
|
Returns: |
|
politic_dict (dict): Dictionary containing the html page, the url and the |
|
name of the deputy.""" |
|
politic_name = unidecode(self.deputy_name.lower()).replace(" ", "-") |
|
|
|
browser = mechanicalsoup.StatefulBrowser() |
|
url = "https://datan.fr/deputes" |
|
research_page = browser.open(url) |
|
research_html = research_page.soup |
|
|
|
politic_card = research_html.select(f'a[href*="{politic_name}"]') |
|
if politic_card: |
|
url_politic = politic_card[0]["href"] |
|
politic_page = browser.open(url_politic + "/votes") |
|
politic_html = politic_page.soup |
|
politic_dict = { |
|
"html_page": politic_html, |
|
"url": url_politic, |
|
"name": politic_name, |
|
} |
|
return politic_dict |
|
else: |
|
raise ValueError(f"Politic {politic_name} not found") |
|
|
|
def get_votes_from_politic_page(self): |
|
"""Extracts the voting records from the html page of a deputy. |
|
|
|
Args: |
|
politic_dict (dict): Dictionary containing the html page, the url and the |
|
name of the deputy. |
|
Returns: |
|
df (pd.DataFrame): DataFrame containing the voting records of the deputy.""" |
|
|
|
politic_html = self.deputy_data["html_page"] |
|
politic_name = self.deputy_data["name"] |
|
vote_elements = politic_html.find_all("div", class_="card card-vote") |
|
vote_categories = politic_html.find_all( |
|
class_=re.compile("col-md-6 sorting-item*") |
|
) |
|
votes = [] |
|
for i, vote_element in enumerate(vote_elements): |
|
for_or_against = ( |
|
vote_element.find("div", class_="d-flex align-items-center") |
|
.text.replace("\n", "") |
|
.strip() |
|
) |
|
vote_topic = ( |
|
vote_element.find("a", class_="stretched-link underline no-decoration") |
|
.text.replace("\n", "") |
|
.strip() |
|
) |
|
vote_id = ( |
|
vote_element.find("a", class_="stretched-link underline no-decoration")[ |
|
"href" |
|
] |
|
.split("/")[-1] |
|
.replace("\n", "") |
|
.strip() |
|
) |
|
vote_date = ( |
|
vote_element.find("span", class_="date").text.replace("\n", "").strip() |
|
) |
|
vote_category = vote_categories[i]["class"][-1] |
|
votes.append( |
|
[ |
|
vote_id, |
|
for_or_against, |
|
vote_topic, |
|
vote_date, |
|
politic_name, |
|
vote_category, |
|
] |
|
) |
|
df = pd.DataFrame( |
|
votes, |
|
columns=[ |
|
"vote_id", |
|
"for_or_against", |
|
"vote_topic", |
|
"vote_date", |
|
"politic_name", |
|
"vote_category", |
|
], |
|
) |
|
return df |
|
|
|
def get_politic_image(self): |
|
"""Fetches the image of a deputy. |
|
|
|
Args: |
|
politic_name (str): Name of the deputy. |
|
Returns: |
|
image (str): URL of the image of the deputy.""" |
|
image = self.deputy_data["html_page"].find("img", alt=self.deputy_name) |
|
image_src = image.get("src") |
|
return image_src |
|
|
|
def get_politic_party(self): |
|
party = ( |
|
self.deputy_data["html_page"] |
|
.find("div", class_="link-group text-center mt-1") |
|
.text.replace("\n", "") |
|
.strip() |
|
) |
|
return party |
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
#col-container { |
|
margin: 0 auto; |
|
max-width: 800px; |
|
} |
|
""" |
|
|
|
def fetch_votes(deputy_name): |
|
pdfposter = PDFPoster(deputy_name) |
|
votes = pdfposter.retrieve_deputy_data() |
|
vote_list = votes['vote_id'].tolist() |
|
vote_list = votes['vote_topic'].tolist() |
|
return gr.update(choices=vote_list) |
|
|
|
def generate_poster(deputy_name, message_1, message_2, vote_list): |
|
|
|
if not message_1: |
|
message_1 = "Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!" |
|
if not message_2: |
|
message_2 = "Les 30 juin, et 7 juillet, renseignez vous, et votez en connaissance de cause !" |
|
|
|
pdfposter = PDFPoster(deputy_name) |
|
pdfposter.retrieve_deputy_data() |
|
pdfbuffer = pdfposter.generate_poster(vote_list, message_1, message_2) |
|
images = convert_from_bytes(pdf_buffer.getvalue()) |
|
|
|
image_paths = [] |
|
for i, image in enumerate(images): |
|
image_path = f"./static/{deputy_name}_page_{i+1}.png" |
|
image.save(image_path, "PNG") |
|
image_paths.append(image_path) |
|
|
|
return image_paths, pdf_buffer.getvalue() |
|
|
|
with gr.Blocks(css=css) as demo: |
|
|
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown(""" |
|
# Réalise une affiche des votes de ton député ! |
|
""") |
|
|
|
with gr.Row(): |
|
deputy_name = gr.Text( |
|
label="deputy_name", |
|
show_label=False, |
|
max_lines=1, |
|
placeholder="Nom du député, si tu ne le connais pas RDV sur www.datan.fr ou www.nosdeputes.fr", |
|
container=False, |
|
) |
|
fetch_button = gr.Button("Récupère ses votes importants", scale=0) |
|
|
|
vote_list = gr.CheckboxGroup(label="Select Votes", choices=[]) |
|
|
|
with gr.Row(): |
|
message_1 = gr.Text( |
|
label="message_1", |
|
max_lines=1, |
|
placeholder="Les votes de vos députés sont souvent différents de ce que les responsables de partis annoncent dans les médias. Les données de votes sont ouvertes!", |
|
visible=True, |
|
) |
|
message_2 = gr.Text( |
|
label="message_2", |
|
max_lines=1, |
|
placeholder="Les 30 juin, et 7 juillet, renseignez vous, et votez en connaissance de cause !", |
|
visible=True, |
|
) |
|
|
|
generate_button = gr.Button("Générer l'affiche ! ", scale=0) |
|
images_output = gr.Gallery(label="Image") |
|
pdf_output = gr.File(label="Télécharger le PDF") |
|
|
|
fetch_button.click( |
|
fn=fetch_votes, |
|
inputs=deputy_name, |
|
outputs=vote_list |
|
) |
|
|
|
generate_button.click( |
|
fn=generate_poster, |
|
inputs=[deputy_name, message_1, message_2, vote_list], |
|
outputs=[images_output, pdf_output] |
|
) |
|
|
|
demo.queue().launch() |
|
|