Spaces:

Seth0330
/

AIEXTRACT1

Sleeping

App Files Files Community

Seth0330 commited on 15 days ago

Commit

ef35ecf

verified ·

1 Parent(s): d091adc

Update backend/app/openrouter_client.py

Browse files

Files changed (1) hide show

backend/app/openrouter_client.py +40 -10

backend/app/openrouter_client.py CHANGED Viewed

@@ -40,10 +40,10 @@ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
         mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
         pix = page.get_pixmap(matrix=mat)
-        # Convert to PIL Image then to PNG bytes
         img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
         img_bytes = BytesIO()
-        img.save(img_bytes, format="PNG")
         images.append(img_bytes.getvalue())
         print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
@@ -53,9 +53,9 @@ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
 def _image_bytes_to_base64(image_bytes: bytes) -> str:
-    """Convert image bytes to base64 data URL."""
     b64 = base64.b64encode(image_bytes).decode("utf-8")
-    return f"data:image/png;base64,{b64}"
 def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
@@ -73,12 +73,13 @@ def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str
         pdf_images = _pdf_to_images(file_bytes)
         # Create image blocks for each page
         image_blocks = []
         for i, img_bytes in enumerate(pdf_images):
-            image_url = _image_bytes_to_base64(img_bytes)
             image_blocks.append({
-                "type": "input_image",
-                "image_url": image_url,
             })
             print(f"[INFO] Created image block for page {i + 1} ({len(img_bytes)} bytes)")
@@ -86,12 +87,41 @@ def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str
     # Handle regular image files
     else:
-        b64 = base64.b64encode(file_bytes).decode("utf-8")
         print(f"[DEBUG] Encoding image file. Content type: {content_type}, Size: {len(file_bytes)} bytes")
         return [{
-            "type": "input_image",
-            "image_url": f"data:{content_type};base64,{b64}",
         }]

         mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality
         pix = page.get_pixmap(matrix=mat)
+        # Convert to PIL Image then to JPEG bytes (better compression, matches working code)
         img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
         img_bytes = BytesIO()
+        img.save(img_bytes, format="JPEG", quality=95)
         images.append(img_bytes.getvalue())
         print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
 def _image_bytes_to_base64(image_bytes: bytes) -> str:
+    """Convert image bytes to base64 data URL (JPEG format)."""
     b64 = base64.b64encode(image_bytes).decode("utf-8")
+    return f"data:image/jpeg;base64,{b64}"
 def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
         pdf_images = _pdf_to_images(file_bytes)
         # Create image blocks for each page
+        # OpenRouter format: {"type": "image_url", "image_url": {"url": "data:..."}}
         image_blocks = []
         for i, img_bytes in enumerate(pdf_images):
+            data_url = _image_bytes_to_base64(img_bytes)
             image_blocks.append({
+                "type": "image_url",
+                "image_url": {"url": data_url}
             })
             print(f"[INFO] Created image block for page {i + 1} ({len(img_bytes)} bytes)")
     # Handle regular image files
     else:
+        # Convert to JPEG for consistency (better compression)
+        try:
+            img = Image.open(BytesIO(file_bytes))
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            # Resize if too large (max 1920px on longest side) - matches your working code
+            max_size = 1920
+            w, h = img.size
+            if w > max_size or h > max_size:
+                if w > h:
+                    new_w = max_size
+                    new_h = int(h * (max_size / w))
+                else:
+                    new_h = max_size
+                    new_w = int(w * (max_size / h))
+                img = img.resize((new_w, new_h), Image.LANCZOS)
+                print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
+            # Convert to JPEG bytes
+            img_bytes = BytesIO()
+            img.save(img_bytes, format="JPEG", quality=95)
+            img_bytes = img_bytes.getvalue()
+            data_url = _image_bytes_to_base64(img_bytes)
+        except Exception as e:
+            # Fallback: use original file bytes
+            print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
+            b64 = base64.b64encode(file_bytes).decode("utf-8")
+            data_url = f"data:{content_type};base64,{b64}"
         print(f"[DEBUG] Encoding image file. Content type: {content_type}, Size: {len(file_bytes)} bytes")
         return [{
+            "type": "image_url",
+            "image_url": {"url": data_url}
         }]