Spaces:
Sleeping
Sleeping
Update backend/app/openrouter_client.py
Browse files- backend/app/openrouter_client.py +40 -18
backend/app/openrouter_client.py
CHANGED
|
@@ -146,20 +146,23 @@ async def extract_fields_from_document(
|
|
| 146 |
print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
|
| 147 |
|
| 148 |
system_prompt = (
|
| 149 |
-
"You are a document extraction engine. "
|
| 150 |
-
"You
|
| 151 |
-
"
|
| 152 |
)
|
| 153 |
|
| 154 |
-
# Update prompt for multi-page documents
|
| 155 |
if len(image_blocks) > 1:
|
| 156 |
user_prompt = (
|
| 157 |
-
f"
|
| 158 |
-
"
|
| 159 |
-
"
|
|
|
|
|
|
|
| 160 |
"{\n"
|
| 161 |
-
' \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
|
| 162 |
' \"confidence\": number between 0 and 100,\n'
|
|
|
|
| 163 |
' \"fields\": {\n'
|
| 164 |
' \"invoice_number\": \"...\",\n'
|
| 165 |
' \"date\": \"...\",\n'
|
|
@@ -167,22 +170,35 @@ async def extract_fields_from_document(
|
|
| 167 |
' \"total_amount\": \"...\",\n'
|
| 168 |
' \"currency\": \"...\",\n'
|
| 169 |
' \"vendor_name\": \"...\",\n'
|
|
|
|
|
|
|
| 170 |
' \"line_items\": [\n'
|
| 171 |
' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
|
| 172 |
' ],\n'
|
| 173 |
' \"other_field\": \"...\"\n'
|
| 174 |
-
" }
|
| 175 |
-
"
|
| 176 |
-
"
|
| 177 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
)
|
| 179 |
else:
|
| 180 |
user_prompt = (
|
| 181 |
-
"
|
| 182 |
-
"
|
|
|
|
|
|
|
| 183 |
"{\n"
|
| 184 |
-
' \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
|
| 185 |
' \"confidence\": number between 0 and 100,\n'
|
|
|
|
| 186 |
' \"fields\": {\n'
|
| 187 |
' \"invoice_number\": \"...\",\n'
|
| 188 |
' \"date\": \"...\",\n'
|
|
@@ -190,13 +206,19 @@ async def extract_fields_from_document(
|
|
| 190 |
' \"total_amount\": \"...\",\n'
|
| 191 |
' \"currency\": \"...\",\n'
|
| 192 |
' \"vendor_name\": \"...\",\n'
|
|
|
|
|
|
|
| 193 |
' \"line_items\": [\n'
|
| 194 |
' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
|
| 195 |
' ],\n'
|
| 196 |
' \"other_field\": \"...\"\n'
|
| 197 |
" }\n"
|
| 198 |
-
"}\n"
|
| 199 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
)
|
| 201 |
|
| 202 |
# Build content array with text prompt and all image blocks
|
|
@@ -215,7 +237,7 @@ async def extract_fields_from_document(
|
|
| 215 |
"content": user_content,
|
| 216 |
},
|
| 217 |
],
|
| 218 |
-
"max_tokens":
|
| 219 |
}
|
| 220 |
|
| 221 |
headers = {
|
|
|
|
| 146 |
print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
|
| 147 |
|
| 148 |
system_prompt = (
|
| 149 |
+
"You are a document extraction engine with vision capabilities. "
|
| 150 |
+
"You read and extract text from documents in any language, preserving structure, formatting, and all content. "
|
| 151 |
+
"You output structured JSON with both the full extracted text and key-value pairs."
|
| 152 |
)
|
| 153 |
|
| 154 |
+
# Update prompt for multi-page documents - ask for full text extraction first
|
| 155 |
if len(image_blocks) > 1:
|
| 156 |
user_prompt = (
|
| 157 |
+
f"Read this {len(image_blocks)}-page document using your vision capability and extract ALL text content. "
|
| 158 |
+
"I want the complete end-to-end text from all pages, preserving structure, headings, formatting, and content in all languages.\n\n"
|
| 159 |
+
"Analyze ALL pages thoroughly, including any non-English text (Punjabi, Hindi, or other languages). "
|
| 160 |
+
"Extract every word, number, and piece of information from every page.\n\n"
|
| 161 |
+
"Respond with JSON in this format:\n"
|
| 162 |
"{\n"
|
| 163 |
+
' \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
|
| 164 |
' \"confidence\": number between 0 and 100,\n'
|
| 165 |
+
' \"full_text\": \"Complete extracted text from all pages, preserving structure and formatting. Include all languages.\",\n'
|
| 166 |
' \"fields\": {\n'
|
| 167 |
' \"invoice_number\": \"...\",\n'
|
| 168 |
' \"date\": \"...\",\n'
|
|
|
|
| 170 |
' \"total_amount\": \"...\",\n'
|
| 171 |
' \"currency\": \"...\",\n'
|
| 172 |
' \"vendor_name\": \"...\",\n'
|
| 173 |
+
' \"company_name\": \"...\",\n'
|
| 174 |
+
' \"address\": \"...\",\n'
|
| 175 |
' \"line_items\": [\n'
|
| 176 |
' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
|
| 177 |
' ],\n'
|
| 178 |
' \"other_field\": \"...\"\n'
|
| 179 |
+
" },\n"
|
| 180 |
+
' \"pages\": [\n'
|
| 181 |
+
' {\"page_number\": 1, \"text\": \"Full text from page 1\"},\n'
|
| 182 |
+
' {\"page_number\": 2, \"text\": \"Full text from page 2\"}\n'
|
| 183 |
+
' ]\n'
|
| 184 |
+
"}\n\n"
|
| 185 |
+
"IMPORTANT:\n"
|
| 186 |
+
"- Extract ALL text from ALL pages, including non-English languages\n"
|
| 187 |
+
"- Preserve structure, headings, and formatting in the full_text field\n"
|
| 188 |
+
"- Fill in fields with relevant extracted information\n"
|
| 189 |
+
"- If a field is not found, use empty string or omit it\n"
|
| 190 |
+
"- The full_text should contain everything readable from the document"
|
| 191 |
)
|
| 192 |
else:
|
| 193 |
user_prompt = (
|
| 194 |
+
"Read this document using your vision capability and extract ALL text content. "
|
| 195 |
+
"I want the complete end-to-end text, preserving structure, headings, formatting, and content in all languages.\n\n"
|
| 196 |
+
"Extract every word, number, and piece of information, including any non-English text.\n\n"
|
| 197 |
+
"Respond with JSON in this format:\n"
|
| 198 |
"{\n"
|
| 199 |
+
' \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
|
| 200 |
' \"confidence\": number between 0 and 100,\n'
|
| 201 |
+
' \"full_text\": \"Complete extracted text, preserving structure and formatting. Include all languages.\",\n'
|
| 202 |
' \"fields\": {\n'
|
| 203 |
' \"invoice_number\": \"...\",\n'
|
| 204 |
' \"date\": \"...\",\n'
|
|
|
|
| 206 |
' \"total_amount\": \"...\",\n'
|
| 207 |
' \"currency\": \"...\",\n'
|
| 208 |
' \"vendor_name\": \"...\",\n'
|
| 209 |
+
' \"company_name\": \"...\",\n'
|
| 210 |
+
' \"address\": \"...\",\n'
|
| 211 |
' \"line_items\": [\n'
|
| 212 |
' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
|
| 213 |
' ],\n'
|
| 214 |
' \"other_field\": \"...\"\n'
|
| 215 |
" }\n"
|
| 216 |
+
"}\n\n"
|
| 217 |
+
"IMPORTANT:\n"
|
| 218 |
+
"- Extract ALL text, including non-English languages\n"
|
| 219 |
+
"- Preserve structure, headings, and formatting in the full_text field\n"
|
| 220 |
+
"- Fill in fields with relevant extracted information\n"
|
| 221 |
+
"- If a field is not found, use empty string or omit it"
|
| 222 |
)
|
| 223 |
|
| 224 |
# Build content array with text prompt and all image blocks
|
|
|
|
| 237 |
"content": user_content,
|
| 238 |
},
|
| 239 |
],
|
| 240 |
+
"max_tokens": 8192, # Increased for full text extraction from multi-page documents
|
| 241 |
}
|
| 242 |
|
| 243 |
headers = {
|