Spaces:
Sleeping
Sleeping
Update backend/app/openrouter_client.py
Browse files- backend/app/openrouter_client.py +40 -10
backend/app/openrouter_client.py
CHANGED
|
@@ -40,10 +40,10 @@ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
|
|
| 40 |
mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
|
| 41 |
pix = page.get_pixmap(matrix=mat)
|
| 42 |
|
| 43 |
-
# Convert to PIL Image then to
|
| 44 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 45 |
img_bytes = BytesIO()
|
| 46 |
-
img.save(img_bytes, format="
|
| 47 |
images.append(img_bytes.getvalue())
|
| 48 |
|
| 49 |
print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
|
|
@@ -53,9 +53,9 @@ def _pdf_to_images(pdf_bytes: bytes) -> List[bytes]:
|
|
| 53 |
|
| 54 |
|
| 55 |
def _image_bytes_to_base64(image_bytes: bytes) -> str:
|
| 56 |
-
"""Convert image bytes to base64 data URL."""
|
| 57 |
b64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 58 |
-
return f"data:image/
|
| 59 |
|
| 60 |
|
| 61 |
def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
|
|
@@ -73,12 +73,13 @@ def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str
|
|
| 73 |
pdf_images = _pdf_to_images(file_bytes)
|
| 74 |
|
| 75 |
# Create image blocks for each page
|
|
|
|
| 76 |
image_blocks = []
|
| 77 |
for i, img_bytes in enumerate(pdf_images):
|
| 78 |
-
|
| 79 |
image_blocks.append({
|
| 80 |
-
"type": "
|
| 81 |
-
"image_url":
|
| 82 |
})
|
| 83 |
print(f"[INFO] Created image block for page {i + 1} ({len(img_bytes)} bytes)")
|
| 84 |
|
|
@@ -86,12 +87,41 @@ def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str
|
|
| 86 |
|
| 87 |
# Handle regular image files
|
| 88 |
else:
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
print(f"[DEBUG] Encoding image file. Content type: {content_type}, Size: {len(file_bytes)} bytes")
|
| 91 |
|
| 92 |
return [{
|
| 93 |
-
"type": "
|
| 94 |
-
"image_url":
|
| 95 |
}]
|
| 96 |
|
| 97 |
|
|
|
|
| 40 |
mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
|
| 41 |
pix = page.get_pixmap(matrix=mat)
|
| 42 |
|
| 43 |
+
# Convert to PIL Image then to JPEG bytes (better compression, matches working code)
|
| 44 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 45 |
img_bytes = BytesIO()
|
| 46 |
+
img.save(img_bytes, format="JPEG", quality=95)
|
| 47 |
images.append(img_bytes.getvalue())
|
| 48 |
|
| 49 |
print(f"[INFO] Converted page {page_num + 1} to image ({pix.width}x{pix.height})")
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
def _image_bytes_to_base64(image_bytes: bytes) -> str:
|
| 56 |
+
"""Convert image bytes to base64 data URL (JPEG format)."""
|
| 57 |
b64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 58 |
+
return f"data:image/jpeg;base64,{b64}"
|
| 59 |
|
| 60 |
|
| 61 |
def _file_to_image_blocks(file_bytes: bytes, content_type: str) -> List[Dict[str, Any]]:
|
|
|
|
| 73 |
pdf_images = _pdf_to_images(file_bytes)
|
| 74 |
|
| 75 |
# Create image blocks for each page
|
| 76 |
+
# OpenRouter format: {"type": "image_url", "image_url": {"url": "data:..."}}
|
| 77 |
image_blocks = []
|
| 78 |
for i, img_bytes in enumerate(pdf_images):
|
| 79 |
+
data_url = _image_bytes_to_base64(img_bytes)
|
| 80 |
image_blocks.append({
|
| 81 |
+
"type": "image_url",
|
| 82 |
+
"image_url": {"url": data_url}
|
| 83 |
})
|
| 84 |
print(f"[INFO] Created image block for page {i + 1} ({len(img_bytes)} bytes)")
|
| 85 |
|
|
|
|
| 87 |
|
| 88 |
# Handle regular image files
|
| 89 |
else:
|
| 90 |
+
# Convert to JPEG for consistency (better compression)
|
| 91 |
+
try:
|
| 92 |
+
img = Image.open(BytesIO(file_bytes))
|
| 93 |
+
if img.mode != "RGB":
|
| 94 |
+
img = img.convert("RGB")
|
| 95 |
+
|
| 96 |
+
# Resize if too large (max 1920px on longest side) - matches your working code
|
| 97 |
+
max_size = 1920
|
| 98 |
+
w, h = img.size
|
| 99 |
+
if w > max_size or h > max_size:
|
| 100 |
+
if w > h:
|
| 101 |
+
new_w = max_size
|
| 102 |
+
new_h = int(h * (max_size / w))
|
| 103 |
+
else:
|
| 104 |
+
new_h = max_size
|
| 105 |
+
new_w = int(w * (max_size / h))
|
| 106 |
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
| 107 |
+
print(f"[INFO] Resized image from {w}x{h} to {new_w}x{new_h}")
|
| 108 |
+
|
| 109 |
+
# Convert to JPEG bytes
|
| 110 |
+
img_bytes = BytesIO()
|
| 111 |
+
img.save(img_bytes, format="JPEG", quality=95)
|
| 112 |
+
img_bytes = img_bytes.getvalue()
|
| 113 |
+
data_url = _image_bytes_to_base64(img_bytes)
|
| 114 |
+
except Exception as e:
|
| 115 |
+
# Fallback: use original file bytes
|
| 116 |
+
print(f"[WARNING] Could not process image with PIL: {e}. Using original bytes.")
|
| 117 |
+
b64 = base64.b64encode(file_bytes).decode("utf-8")
|
| 118 |
+
data_url = f"data:{content_type};base64,{b64}"
|
| 119 |
+
|
| 120 |
print(f"[DEBUG] Encoding image file. Content type: {content_type}, Size: {len(file_bytes)} bytes")
|
| 121 |
|
| 122 |
return [{
|
| 123 |
+
"type": "image_url",
|
| 124 |
+
"image_url": {"url": data_url}
|
| 125 |
}]
|
| 126 |
|
| 127 |
|