MMOON commited on
Commit
c291e47
·
verified ·
1 Parent(s): 1efb0d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -74
app.py CHANGED
@@ -2,92 +2,75 @@ import streamlit as st
2
  import pandas as pd
3
  import re
4
  from datetime import datetime
5
- import pdfplumber
6
  import plotly.express as px
7
  import io
8
 
9
- def extract_data_from_pdf(pdf_content):
10
- data_list = []
11
- current_record = {}
12
-
13
- for line in pdf_content.split('\n'):
14
- # Extract header information
15
- if 'Entreprise::' in line:
16
- if current_record and 'temperature_data' in current_record:
17
- data_list.append(current_record)
18
- current_record = {'temperature_data': []}
19
-
 
20
  # Extract metadata
21
- if 'Date:' in line:
22
- try:
23
- date_str = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', line).group(1)
24
- date_str = date_str.replace('/', '.')
25
- current_record['date'] = datetime.strptime(date_str, '%d.%m.%Y').strftime('%Y-%m-%d')
26
- except:
27
- current_record['date'] = None
28
-
29
- if 'Produit:' in line:
30
- current_record['produit'] = line.split('Produit:')[-1].strip()
31
-
32
- if 'Utilisateur:' in line:
33
- current_record['utilisateur'] = line.split('Utilisateur:')[-1].strip()
34
-
35
- # Extract temperature data
36
- if any(x in line for x in ['Début', '+ ', 'Fin']) and '°C' in line:
37
- try:
38
- parts = line.strip().split()
39
- time = parts[0] if 'Début' in line or 'Fin' in line else parts[1]
40
-
41
- # Extract temperature values
42
- temp_sterilisateur = float(parts[-3].replace('°C', ''))
43
- temp_coeur = float(parts[-2].replace('°C', ''))
44
- valeur_f = float(parts[-1])
45
-
46
- current_record['temperature_data'].append({
47
- 'temps': time,
48
- 'temp_sterilisateur': temp_sterilisateur,
49
- 'temp_coeur': temp_coeur,
50
- 'valeur_f': valeur_f
51
- })
52
- except Exception as e:
53
- # If extraction fails, skip the line
54
- continue
55
-
56
- # Add last record
57
- if current_record and 'temperature_data' in current_record:
58
- data_list.append(current_record)
59
-
60
- return data_list
61
 
62
  def analyze_sterilization(data):
63
  results = []
64
 
65
- for record in data:
66
- temp_data = pd.DataFrame(record['temperature_data'])
67
-
68
- # Skip if temperature data is empty
69
- if temp_data.empty:
70
- st.warning(f"Données de température manquantes pour l'enregistrement du {record['date']}")
71
- continue
72
-
73
  # Determine product type and required temperature
74
- is_nutabreizh = 'NutaBreizh' in record['produit']
75
  required_temp = 108 if is_nutabreizh else 103
76
 
77
  # Count minutes at required temperature
78
- minutes_at_temp = len(temp_data[temp_data['temp_coeur'] >= required_temp])
79
 
80
  # Calculate max temperatures
81
- max_temp_sterilisateur = temp_data['temp_sterilisateur'].max()
82
- max_temp_coeur = temp_data['temp_coeur'].max()
83
 
84
  # Determine if criteria met
85
  criteria_met = minutes_at_temp >= 30
86
 
87
  results.append({
88
- 'Date': record['date'],
89
- 'Produit': record['produit'],
90
- 'Utilisateur': record['utilisateur'],
91
  'Temperature_Requise': required_temp,
92
  'Minutes_Temperature_Requise': minutes_at_temp,
93
  'Temperature_Max_Sterilisateur': max_temp_sterilisateur,
@@ -103,14 +86,8 @@ def main():
103
  uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf")
104
 
105
  if uploaded_file is not None:
106
- # Read PDF content
107
- pdf_text = ""
108
- with pdfplumber.open(uploaded_file) as pdf:
109
- for page in pdf.pages:
110
- pdf_text += page.extract_text() + "\n"
111
-
112
  # Process data
113
- data = extract_data_from_pdf(pdf_text)
114
  results_df = analyze_sterilization(data)
115
 
116
  # Display results
 
2
  import pandas as pd
3
  import re
4
  from datetime import datetime
5
+ import fitz # PyMuPDF
6
  import plotly.express as px
7
  import io
8
 
9
+ def extract_data_from_pdf(pdf_path):
10
+ # Open the PDF file
11
+ pdf_document = fitz.open(pdf_path)
12
+ extracted_texts = [page.get_text() for page in pdf_document]
13
+ pdf_document.close()
14
+
15
+ # Define a regular expression pattern to match the lines with temperature data
16
+ pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})')
17
+ structured_data = []
18
+
19
+ # Process the extracted text from each page
20
+ for page_text in extracted_texts:
21
  # Extract metadata
22
+ company_match = re.search(r'Entreprise::\s*(.+)', page_text)
23
+ date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text)
24
+ user_match = re.search(r'Utilisateur:\s*(.+)', page_text)
25
+ product_match = re.search(r'Produit:\s*(.+)', page_text)
26
+
27
+ if not all([company_match, date_match, user_match, product_match]):
28
+ continue # Skip pages without metadata
29
+
30
+ company = company_match.group(1).strip()
31
+ date = date_match.group(1).replace('/', '.').strip()
32
+ user = user_match.group(1).strip()
33
+ product = product_match.group(1).strip()
34
+
35
+ # Find all matches of the pattern in the text
36
+ matches = pattern.findall(page_text)
37
+ for match in matches:
38
+ time, sterilizer_temp, core_temp, f_value = match
39
+ structured_data.append({
40
+ 'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'),
41
+ 'Entreprise': company,
42
+ 'Utilisateur': user,
43
+ 'Produit': product,
44
+ 'Déroulement': time.strip(),
45
+ 'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')),
46
+ 'Temp. à coeur': float(core_temp.replace('°C', '')),
47
+ 'Valeur F': float(f_value.replace(',', '.'))
48
+ })
49
+
50
+ return pd.DataFrame(structured_data)
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def analyze_sterilization(data):
53
  results = []
54
 
55
+ for product, group in data.groupby('Produit'):
 
 
 
 
 
 
 
56
  # Determine product type and required temperature
57
+ is_nutabreizh = 'NutaBreizh' in product
58
  required_temp = 108 if is_nutabreizh else 103
59
 
60
  # Count minutes at required temperature
61
+ minutes_at_temp = len(group[group['Temp. à coeur'] >= required_temp])
62
 
63
  # Calculate max temperatures
64
+ max_temp_sterilisateur = group['Temp. du stérilisateur'].max()
65
+ max_temp_coeur = group['Temp. à coeur'].max()
66
 
67
  # Determine if criteria met
68
  criteria_met = minutes_at_temp >= 30
69
 
70
  results.append({
71
+ 'Date': group['Date'].iloc[0],
72
+ 'Produit': product,
73
+ 'Utilisateur': group['Utilisateur'].iloc[0],
74
  'Temperature_Requise': required_temp,
75
  'Minutes_Temperature_Requise': minutes_at_temp,
76
  'Temperature_Max_Sterilisateur': max_temp_sterilisateur,
 
86
  uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf")
87
 
88
  if uploaded_file is not None:
 
 
 
 
 
 
89
  # Process data
90
+ data = extract_data_from_pdf(uploaded_file)
91
  results_df = analyze_sterilization(data)
92
 
93
  # Display results