davanstrien HF Staff commited on
Commit
caebeb2
Β·
1 Parent(s): bbe7feb

Refactor app.py for improved readability and consistency; streamline model loading and update extraction prompt.

Browse files
Files changed (1) hide show
  1. app.py +16 -30
app.py CHANGED
@@ -12,9 +12,7 @@ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
12
  # Load model and processor
13
  print("Loading Qwen3-VL-30B-A3B-Instruct model...")
14
  model = AutoModelForImageTextToText.from_pretrained(
15
- "Qwen/Qwen3-VL-30B-A3B-Instruct",
16
- torch_dtype=torch.bfloat16,
17
- device_map="auto"
18
  )
19
  processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
20
  print("Model loaded successfully!")
@@ -25,10 +23,10 @@ EXTRACTION_PROMPT = """Extract all metadata from this library catalog card and r
25
  - date: Any dates mentioned (publication, creation, or coverage dates)
26
  - call_number: Library classification or call number
27
  - physical_description: Details about the physical item (size, extent, format)
28
- - subjects: Subject headings or topics
29
  - notes: Any additional notes or information
30
 
31
- Return ONLY the JSON object, nothing else. If a field is not present on the card, use null for that field."""
 
32
 
33
  @spaces.GPU
34
  def extract_metadata(image):
@@ -47,8 +45,8 @@ def extract_metadata(image):
47
  "role": "user",
48
  "content": [
49
  {"type": "image", "image": image},
50
- {"type": "text", "text": EXTRACTION_PROMPT}
51
- ]
52
  }
53
  ]
54
 
@@ -63,29 +61,27 @@ def extract_metadata(image):
63
  images=image_inputs,
64
  videos=video_inputs,
65
  padding=True,
66
- return_tensors="pt"
67
  )
68
  inputs = inputs.to(model.device)
69
 
70
  # Generate
71
  with torch.inference_mode():
72
  generated_ids = model.generate(
73
- **inputs,
74
- max_new_tokens=512,
75
- temperature=0.1,
76
- do_sample=False
77
  )
78
 
79
  # Trim input tokens from output
80
  generated_ids_trimmed = [
81
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 
82
  ]
83
 
84
  # Decode output
85
  output_text = processor.batch_decode(
86
  generated_ids_trimmed,
87
  skip_special_tokens=True,
88
- clean_up_tokenization_spaces=False
89
  )[0]
90
 
91
  # Try to parse as JSON for pretty formatting
@@ -99,11 +95,12 @@ def extract_metadata(image):
99
  except Exception as e:
100
  return f"Error during extraction: {str(e)}"
101
 
 
102
  # Create Gradio interface
103
  with gr.Blocks(title="Library Card Metadata Extractor") as demo:
104
  gr.Markdown("# πŸ“‡ Library Card Metadata Extractor")
105
  gr.Markdown(
106
- "Extract structured metadata from library catalog cards using **Qwen3-VL-30B**. "
107
  "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
108
  "call numbers, and more.\n\n"
109
  "This demo works with catalog cards from libraries and archives, such as the "
@@ -116,25 +113,14 @@ with gr.Blocks(title="Library Card Metadata Extractor") as demo:
116
  with gr.Row():
117
  with gr.Column(scale=1):
118
  gr.Markdown("### πŸ“€ Upload Catalog Card")
119
- image_input = gr.Image(
120
- label="Library Catalog Card",
121
- type="pil"
122
- )
123
  submit_btn = gr.Button("πŸ” Extract Metadata", variant="primary", size="lg")
124
 
125
  with gr.Column(scale=1):
126
  gr.Markdown("### πŸ“‹ Extracted Metadata (JSON)")
127
- output = gr.Code(
128
- label="Metadata",
129
- language="json",
130
- lines=15
131
- )
132
 
133
- submit_btn.click(
134
- fn=extract_metadata,
135
- inputs=image_input,
136
- outputs=output
137
- )
138
 
139
  gr.Markdown("---")
140
 
@@ -152,7 +138,7 @@ with gr.Blocks(title="Library Card Metadata Extractor") as demo:
152
  inputs=image_input,
153
  outputs=output,
154
  fn=extract_metadata,
155
- cache_examples=False
156
  )
157
 
158
  gr.Markdown("---")
 
12
  # Load model and processor
13
  print("Loading Qwen3-VL-30B-A3B-Instruct model...")
14
  model = AutoModelForImageTextToText.from_pretrained(
15
+ "Qwen/Qwen3-VL-30B-A3B-Instruct", torch_dtype=torch.bfloat16, device_map="auto"
 
 
16
  )
17
  processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
18
  print("Model loaded successfully!")
 
23
  - date: Any dates mentioned (publication, creation, or coverage dates)
24
  - call_number: Library classification or call number
25
  - physical_description: Details about the physical item (size, extent, format)
 
26
  - notes: Any additional notes or information
27
 
28
+ Return NLY the JSON object, nothing else. If a field is not present on the card, use null for that field."""
29
+
30
 
31
  @spaces.GPU
32
  def extract_metadata(image):
 
45
  "role": "user",
46
  "content": [
47
  {"type": "image", "image": image},
48
+ {"type": "text", "text": EXTRACTION_PROMPT},
49
+ ],
50
  }
51
  ]
52
 
 
61
  images=image_inputs,
62
  videos=video_inputs,
63
  padding=True,
64
+ return_tensors="pt",
65
  )
66
  inputs = inputs.to(model.device)
67
 
68
  # Generate
69
  with torch.inference_mode():
70
  generated_ids = model.generate(
71
+ **inputs, max_new_tokens=512, temperature=0.1, do_sample=False
 
 
 
72
  )
73
 
74
  # Trim input tokens from output
75
  generated_ids_trimmed = [
76
+ out_ids[len(in_ids) :]
77
+ for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
78
  ]
79
 
80
  # Decode output
81
  output_text = processor.batch_decode(
82
  generated_ids_trimmed,
83
  skip_special_tokens=True,
84
+ clean_up_tokenization_spaces=False,
85
  )[0]
86
 
87
  # Try to parse as JSON for pretty formatting
 
95
  except Exception as e:
96
  return f"Error during extraction: {str(e)}"
97
 
98
+
99
  # Create Gradio interface
100
  with gr.Blocks(title="Library Card Metadata Extractor") as demo:
101
  gr.Markdown("# πŸ“‡ Library Card Metadata Extractor")
102
  gr.Markdown(
103
+ "Extract structured metadata from library catalog cards using **Qwen/Qwen3-VL-30B-A3B-Instruct**. "
104
  "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
105
  "call numbers, and more.\n\n"
106
  "This demo works with catalog cards from libraries and archives, such as the "
 
113
  with gr.Row():
114
  with gr.Column(scale=1):
115
  gr.Markdown("### πŸ“€ Upload Catalog Card")
116
+ image_input = gr.Image(label="Library Catalog Card", type="pil")
 
 
 
117
  submit_btn = gr.Button("πŸ” Extract Metadata", variant="primary", size="lg")
118
 
119
  with gr.Column(scale=1):
120
  gr.Markdown("### πŸ“‹ Extracted Metadata (JSON)")
121
+ output = gr.Code(label="Metadata", language="json", lines=15)
 
 
 
 
122
 
123
+ submit_btn.click(fn=extract_metadata, inputs=image_input, outputs=output)
 
 
 
 
124
 
125
  gr.Markdown("---")
126
 
 
138
  inputs=image_input,
139
  outputs=output,
140
  fn=extract_metadata,
141
+ cache_examples=False,
142
  )
143
 
144
  gr.Markdown("---")