davanstrien HF Staff commited on
Commit
a85cd29
Β·
verified Β·
1 Parent(s): 9843c36

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/rubenstein_1.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,49 @@
1
  ---
2
- title: Vllm Index Card Extractor
3
- emoji: πŸ‘€
4
- colorFrom: gray
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.49.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Library Card Metadata Extractor
3
+ emoji: πŸ“‡
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.30.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # Library Card Metadata Extractor
13
+
14
+ Extract structured metadata from library catalog cards using Qwen3-VL-30B-A3B-Instruct.
15
+
16
+ ## Features
17
+
18
+ - Upload an image of a library catalog card
19
+ - Automatically extract metadata fields:
20
+ - Title
21
+ - Author/Creator
22
+ - Dates
23
+ - Call Number
24
+ - Physical Description
25
+ - Subjects
26
+ - Notes
27
+ - Returns structured JSON output
28
+
29
+ ## Datasets
30
+
31
+ This demo works with catalog cards from:
32
+ - [Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) - Duke University's manuscript catalog cards
33
+ - [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog) - BPL's rare books catalog cards
34
+
35
+ ## Model
36
+
37
+ Uses [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct), a powerful vision-language model with:
38
+ - 30B parameters
39
+ - 256K context length
40
+ - Advanced OCR capabilities in 32 languages
41
+ - Strong visual understanding and reasoning
42
+
43
+ ## Usage
44
+
45
+ 1. Upload an image of a library catalog card
46
+ 2. Click "Extract Metadata"
47
+ 3. View the extracted metadata as formatted JSON
48
+
49
+ Built for the GLAM (Galleries, Libraries, Archives, Museums) community.
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import os
4
+ import torch
5
+ import json
6
+ import spaces
7
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
8
+ from qwen_vl_utils import process_vision_info
9
+
10
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
+
12
+ # Load model and processor
13
+ print("Loading Qwen3-VL-30B-A3B-Instruct model...")
14
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
15
+ "Qwen/Qwen3-VL-30B-A3B-Instruct",
16
+ torch_dtype=torch.bfloat16,
17
+ device_map="auto"
18
+ )
19
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
20
+ print("Model loaded successfully!")
21
+
22
+ EXTRACTION_PROMPT = """Extract all metadata from this library catalog card and return it as valid JSON with the following fields:
23
+ - title: The main title or name on the card
24
+ - author: Author, creator, or associated person/organization
25
+ - date: Any dates mentioned (publication, creation, or coverage dates)
26
+ - call_number: Library classification or call number
27
+ - physical_description: Details about the physical item (size, extent, format)
28
+ - subjects: Subject headings or topics
29
+ - notes: Any additional notes or information
30
+
31
+ Return ONLY the JSON object, nothing else. If a field is not present on the card, use null for that field."""
32
+
33
+ @spaces.GPU
34
+ def extract_metadata(image):
35
+ """Extract structured metadata from catalog card image."""
36
+ if image is None:
37
+ return "Please upload an image."
38
+
39
+ try:
40
+ # Ensure image is PIL Image
41
+ if not isinstance(image, Image.Image):
42
+ image = Image.open(image).convert("RGB")
43
+
44
+ # Format messages for Qwen3-VL
45
+ messages = [
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {"type": "image", "image": image},
50
+ {"type": "text", "text": EXTRACTION_PROMPT}
51
+ ]
52
+ }
53
+ ]
54
+
55
+ # Prepare inputs
56
+ text = processor.apply_chat_template(
57
+ messages, tokenize=False, add_generation_prompt=True
58
+ )
59
+ image_inputs, video_inputs = process_vision_info(messages)
60
+
61
+ inputs = processor(
62
+ text=[text],
63
+ images=image_inputs,
64
+ videos=video_inputs,
65
+ padding=True,
66
+ return_tensors="pt"
67
+ )
68
+ inputs = inputs.to(model.device)
69
+
70
+ # Generate
71
+ with torch.inference_mode():
72
+ generated_ids = model.generate(
73
+ **inputs,
74
+ max_new_tokens=512,
75
+ temperature=0.1,
76
+ do_sample=False
77
+ )
78
+
79
+ # Trim input tokens from output
80
+ generated_ids_trimmed = [
81
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
82
+ ]
83
+
84
+ # Decode output
85
+ output_text = processor.batch_decode(
86
+ generated_ids_trimmed,
87
+ skip_special_tokens=True,
88
+ clean_up_tokenization_spaces=False
89
+ )[0]
90
+
91
+ # Try to parse as JSON for pretty formatting
92
+ try:
93
+ json_data = json.loads(output_text)
94
+ return json.dumps(json_data, indent=2)
95
+ except json.JSONDecodeError:
96
+ # If not valid JSON, return as-is
97
+ return output_text
98
+
99
+ except Exception as e:
100
+ return f"Error during extraction: {str(e)}"
101
+
102
+ # Create Gradio interface
103
+ with gr.Blocks(title="Library Card Metadata Extractor") as demo:
104
+ gr.Markdown("# πŸ“‡ Library Card Metadata Extractor")
105
+ gr.Markdown(
106
+ "Extract structured metadata from library catalog cards using **Qwen3-VL-30B**. "
107
+ "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
108
+ "call numbers, and more.\n\n"
109
+ "This demo works with catalog cards from libraries and archives, such as the "
110
+ "[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
111
+ "and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)."
112
+ )
113
+
114
+ gr.Markdown("---")
115
+
116
+ with gr.Row():
117
+ with gr.Column(scale=1):
118
+ gr.Markdown("### πŸ“€ Upload Catalog Card")
119
+ image_input = gr.Image(
120
+ label="Library Catalog Card",
121
+ type="pil"
122
+ )
123
+ submit_btn = gr.Button("πŸ” Extract Metadata", variant="primary", size="lg")
124
+
125
+ with gr.Column(scale=1):
126
+ gr.Markdown("### πŸ“‹ Extracted Metadata (JSON)")
127
+ output = gr.Code(
128
+ label="Metadata",
129
+ language="json",
130
+ lines=15
131
+ )
132
+
133
+ submit_btn.click(
134
+ fn=extract_metadata,
135
+ inputs=image_input,
136
+ outputs=output
137
+ )
138
+
139
+ gr.Markdown("---")
140
+
141
+ # Examples
142
+ gr.Markdown("## 🎯 Try Examples")
143
+ gr.Examples(
144
+ examples=[
145
+ ["examples/rubenstein_0.jpg"],
146
+ ["examples/rubenstein_1.jpg"],
147
+ ["examples/rubenstein_2.jpg"],
148
+ ["examples/bpl_0.jpg"],
149
+ ["examples/bpl_1.jpg"],
150
+ ["examples/bpl_2.jpg"],
151
+ ],
152
+ inputs=image_input,
153
+ outputs=output,
154
+ fn=extract_metadata,
155
+ cache_examples=False
156
+ )
157
+
158
+ gr.Markdown("---")
159
+
160
+ # Footer
161
+ gr.Markdown(
162
+ "<center>\n\n"
163
+ "Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | "
164
+ "Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
165
+ "and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n"
166
+ "</center>"
167
+ )
168
+
169
+ if __name__ == "__main__":
170
+ print("Launching demo...")
171
+ demo.launch()
examples/bpl_0.jpg ADDED
examples/bpl_1.jpg ADDED
examples/bpl_2.jpg ADDED
examples/rubenstein_0.jpg ADDED
examples/rubenstein_1.jpg ADDED

Git LFS Details

  • SHA256: edc64010c4c2dbabe505cd8d17843628ce2f26b4eaee93e41d0cd2330f67345e
  • Pointer size: 131 Bytes
  • Size of remote file: 170 kB
examples/rubenstein_2.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ gradio
3
+ Pillow
4
+ transformers>=4.49.0
5
+ qwen-vl-utils
6
+ spaces
7
+ accelerate