jordyvl commited on
Commit
13cbc38
1 Parent(s): 872bd1f

Issue with namedtempfile resolved

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -98,7 +98,10 @@ def main(dataset, label):
98
  timestamp = time.time()
99
  seed = int(timestamp * 1000) % 1000000
100
 
101
- shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
 
 
 
102
 
103
  # first get PDF file
104
  for sample in shuffled_dataset:
@@ -110,9 +113,11 @@ def main(dataset, label):
110
  grid = pdf_to_grid(BytesIO(pdf_path))
111
  if grid is None:
112
  continue
113
- PDF = tempfile.NamedTemporaryFile(suffix=".pdf")
114
- PDF.write(pdf_path)
115
- return filelabel, grid, pdf_path
 
 
116
 
117
 
118
  _CLASSES = [
@@ -139,7 +144,7 @@ _CLASSES = [
139
  DATASETS = OrderedDict(
140
  {
141
  # "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
142
- "rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test", streaming=True),
143
  }
144
  )
145
 
@@ -166,6 +171,7 @@ The first time that the app is launched, it will download the datasets, which ca
166
  For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
167
  """
168
 
 
169
  iface = gr.Interface(
170
  fn=main,
171
  inputs=sliders,
 
98
  timestamp = time.time()
99
  seed = int(timestamp * 1000) % 1000000
100
 
101
+ try:
102
+ shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
103
+ except: # lazy
104
+ shuffled_dataset = DATASETS[dataset].shuffle(seed=seed)
105
 
106
  # first get PDF file
107
  for sample in shuffled_dataset:
 
113
  grid = pdf_to_grid(BytesIO(pdf_path))
114
  if grid is None:
115
  continue
116
+ PDF = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
117
+ with PDF as tmp_file:
118
+ # pdf_path.to_file(tmp_file.name)
119
+ tmp_file.write(pdf_path)
120
+ return filelabel, grid, tmp_file.name
121
 
122
 
123
  _CLASSES = [
 
144
  DATASETS = OrderedDict(
145
  {
146
  # "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
147
+ "rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"),
148
  }
149
  )
150
 
 
171
  For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
172
  """
173
 
174
+ # main("rvl_cdip_N", "letter")
175
  iface = gr.Interface(
176
  fn=main,
177
  inputs=sliders,