Spaces:

davanstrien
/

vllm-index-card-extractor

Running on Zero

App Files Files Community

davanstrien HF Staff commited on Oct 6

Commit

a85cd29

verified ·

1 Parent(s): 9843c36

Upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
README.md +43 -6
app.py +171 -0
examples/bpl_0.jpg +0 -0
examples/bpl_1.jpg +0 -0
examples/bpl_2.jpg +0 -0
examples/rubenstein_0.jpg +0 -0
examples/rubenstein_1.jpg +3 -0
examples/rubenstein_2.jpg +0 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/rubenstein_1.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,49 @@
 ---
-title: Vllm Index Card Extractor
-emoji: 👀
-colorFrom: gray
-colorTo: blue
 sdk: gradio
-sdk_version: 5.49.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Library Card Metadata Extractor
+emoji: 📇
+colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 5.30.0
 app_file: app.py
 pinned: false
 ---
+# Library Card Metadata Extractor
+Extract structured metadata from library catalog cards using Qwen3-VL-30B-A3B-Instruct.
+## Features
+- Upload an image of a library catalog card
+- Automatically extract metadata fields:
+  - Title
+  - Author/Creator
+  - Dates
+  - Call Number
+  - Physical Description
+  - Subjects
+  - Notes
+- Returns structured JSON output
+## Datasets
+This demo works with catalog cards from:
+- [Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) - Duke University's manuscript catalog cards
+- [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog) - BPL's rare books catalog cards
+## Model
+Uses [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct), a powerful vision-language model with:
+- 30B parameters
+- 256K context length
+- Advanced OCR capabilities in 32 languages
+- Strong visual understanding and reasoning
+## Usage
+1. Upload an image of a library catalog card
+2. Click "Extract Metadata"
+3. View the extracted metadata as formatted JSON
+Built for the GLAM (Galleries, Libraries, Archives, Museums) community.

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import gradio as gr
+from PIL import Image
+import os
+import torch
+import json
+import spaces
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+# Load model and processor
+print("Loading Qwen3-VL-30B-A3B-Instruct model...")
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen3-VL-30B-A3B-Instruct",
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-30B-A3B-Instruct")
+print("Model loaded successfully!")
+EXTRACTION_PROMPT = """Extract all metadata from this library catalog card and return it as valid JSON with the following fields:
+- title: The main title or name on the card
+- author: Author, creator, or associated person/organization
+- date: Any dates mentioned (publication, creation, or coverage dates)
+- call_number: Library classification or call number
+- physical_description: Details about the physical item (size, extent, format)
+- subjects: Subject headings or topics
+- notes: Any additional notes or information
+Return ONLY the JSON object, nothing else. If a field is not present on the card, use null for that field."""
+@spaces.GPU
+def extract_metadata(image):
+    """Extract structured metadata from catalog card image."""
+    if image is None:
+        return "Please upload an image."
+    try:
+        # Ensure image is PIL Image
+        if not isinstance(image, Image.Image):
+            image = Image.open(image).convert("RGB")
+        # Format messages for Qwen3-VL
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": EXTRACTION_PROMPT}
+                ]
+            }
+        ]
+        # Prepare inputs
+        text = processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt"
+        )
+        inputs = inputs.to(model.device)
+        # Generate
+        with torch.inference_mode():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.1,
+                do_sample=False
+            )
+        # Trim input tokens from output
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        # Decode output
+        output_text = processor.batch_decode(
+            generated_ids_trimmed,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )[0]
+        # Try to parse as JSON for pretty formatting
+        try:
+            json_data = json.loads(output_text)
+            return json.dumps(json_data, indent=2)
+        except json.JSONDecodeError:
+            # If not valid JSON, return as-is
+            return output_text
+    except Exception as e:
+        return f"Error during extraction: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Library Card Metadata Extractor") as demo:
+    gr.Markdown("# 📇 Library Card Metadata Extractor")
+    gr.Markdown(
+        "Extract structured metadata from library catalog cards using **Qwen3-VL-30B**. "
+        "Upload an image of a catalog card and get JSON-formatted metadata including title, author, dates, "
+        "call numbers, and more.\n\n"
+        "This demo works with catalog cards from libraries and archives, such as the "
+        "[Rubenstein Manuscript Catalog](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
+        "and [Boston Public Library Card Catalog](https://huggingface.co/datasets/biglam/bpl-card-catalog)."
+    )
+    gr.Markdown("---")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Catalog Card")
+            image_input = gr.Image(
+                label="Library Catalog Card",
+                type="pil"
+            )
+            submit_btn = gr.Button("🔍 Extract Metadata", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            gr.Markdown("### 📋 Extracted Metadata (JSON)")
+            output = gr.Code(
+                label="Metadata",
+                language="json",
+                lines=15
+            )
+    submit_btn.click(
+        fn=extract_metadata,
+        inputs=image_input,
+        outputs=output
+    )
+    gr.Markdown("---")
+    # Examples
+    gr.Markdown("## 🎯 Try Examples")
+    gr.Examples(
+        examples=[
+            ["examples/rubenstein_0.jpg"],
+            ["examples/rubenstein_1.jpg"],
+            ["examples/rubenstein_2.jpg"],
+            ["examples/bpl_0.jpg"],
+            ["examples/bpl_1.jpg"],
+            ["examples/bpl_2.jpg"],
+        ],
+        inputs=image_input,
+        outputs=output,
+        fn=extract_metadata,
+        cache_examples=False
+    )
+    gr.Markdown("---")
+    # Footer
+    gr.Markdown(
+        "<center>\n\n"
+        "Built for the GLAM community using [Qwen3-VL-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct) | "
+        "Example cards from [Rubenstein](https://huggingface.co/datasets/biglam/rubenstein-manuscript-catalog) "
+        "and [BPL](https://huggingface.co/datasets/biglam/bpl-card-catalog) collections\n\n"
+        "</center>"
+    )
+if __name__ == "__main__":
+    print("Launching demo...")
+    demo.launch()

examples/bpl_0.jpg ADDED Viewed

examples/bpl_1.jpg ADDED Viewed

examples/bpl_2.jpg ADDED Viewed

examples/rubenstein_0.jpg ADDED Viewed

examples/rubenstein_1.jpg ADDED Viewed

Git LFS Details

SHA256: edc64010c4c2dbabe505cd8d17843628ce2f26b4eaee93e41d0cd2330f67345e
Pointer size: 131 Bytes
Size of remote file: 170 kB

examples/rubenstein_2.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+gradio
+Pillow
+transformers>=4.49.0
+qwen-vl-utils
+spaces
+accelerate