Seth0330 commited on
Commit
b5224a9
·
verified ·
1 Parent(s): ef35ecf

Update backend/app/openrouter_client.py

Browse files
Files changed (1) hide show
  1. backend/app/openrouter_client.py +40 -18
backend/app/openrouter_client.py CHANGED
@@ -146,20 +146,23 @@ async def extract_fields_from_document(
146
  print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
147
 
148
  system_prompt = (
149
- "You are a document extraction engine. "
150
- "You analyze invoices, receipts, contracts, reports and similar documents, "
151
- "and output structured JSON only (no explanations or comments)."
152
  )
153
 
154
- # Update prompt for multi-page documents
155
  if len(image_blocks) > 1:
156
  user_prompt = (
157
- f"Extract important key-value pairs from this {len(image_blocks)}-page document. "
158
- "Analyze all pages and combine the information into a single JSON response.\n"
159
- "Use this shape:\n"
 
 
160
  "{\n"
161
- ' \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
162
  ' \"confidence\": number between 0 and 100,\n'
 
163
  ' \"fields\": {\n'
164
  ' \"invoice_number\": \"...\",\n'
165
  ' \"date\": \"...\",\n'
@@ -167,22 +170,35 @@ async def extract_fields_from_document(
167
  ' \"total_amount\": \"...\",\n'
168
  ' \"currency\": \"...\",\n'
169
  ' \"vendor_name\": \"...\",\n'
 
 
170
  ' \"line_items\": [\n'
171
  ' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
172
  ' ],\n'
173
  ' \"other_field\": \"...\"\n'
174
- " }\n"
175
- "}\n"
176
- "If fields are missing or not applicable, simply omit them. "
177
- "Combine information from all pages into a single response."
 
 
 
 
 
 
 
 
178
  )
179
  else:
180
  user_prompt = (
181
- "Extract important key-value pairs from the document and respond with JSON only.\n"
182
- "Use this shape:\n"
 
 
183
  "{\n"
184
- ' \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
185
  ' \"confidence\": number between 0 and 100,\n'
 
186
  ' \"fields\": {\n'
187
  ' \"invoice_number\": \"...\",\n'
188
  ' \"date\": \"...\",\n'
@@ -190,13 +206,19 @@ async def extract_fields_from_document(
190
  ' \"total_amount\": \"...\",\n'
191
  ' \"currency\": \"...\",\n'
192
  ' \"vendor_name\": \"...\",\n'
 
 
193
  ' \"line_items\": [\n'
194
  ' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
195
  ' ],\n'
196
  ' \"other_field\": \"...\"\n'
197
  " }\n"
198
- "}\n"
199
- "If fields are missing or not applicable, simply omit them."
 
 
 
 
200
  )
201
 
202
  # Build content array with text prompt and all image blocks
@@ -215,7 +237,7 @@ async def extract_fields_from_document(
215
  "content": user_content,
216
  },
217
  ],
218
- "max_tokens": 4096, # Increased for multi-page documents
219
  }
220
 
221
  headers = {
 
146
  print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
147
 
148
  system_prompt = (
149
+ "You are a document extraction engine with vision capabilities. "
150
+ "You read and extract text from documents in any language, preserving structure, formatting, and all content. "
151
+ "You output structured JSON with both the full extracted text and key-value pairs."
152
  )
153
 
154
+ # Update prompt for multi-page documents - ask for full text extraction first
155
  if len(image_blocks) > 1:
156
  user_prompt = (
157
+ f"Read this {len(image_blocks)}-page document using your vision capability and extract ALL text content. "
158
+ "I want the complete end-to-end text from all pages, preserving structure, headings, formatting, and content in all languages.\n\n"
159
+ "Analyze ALL pages thoroughly, including any non-English text (Punjabi, Hindi, or other languages). "
160
+ "Extract every word, number, and piece of information from every page.\n\n"
161
+ "Respond with JSON in this format:\n"
162
  "{\n"
163
+ ' \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
164
  ' \"confidence\": number between 0 and 100,\n'
165
+ ' \"full_text\": \"Complete extracted text from all pages, preserving structure and formatting. Include all languages.\",\n'
166
  ' \"fields\": {\n'
167
  ' \"invoice_number\": \"...\",\n'
168
  ' \"date\": \"...\",\n'
 
170
  ' \"total_amount\": \"...\",\n'
171
  ' \"currency\": \"...\",\n'
172
  ' \"vendor_name\": \"...\",\n'
173
+ ' \"company_name\": \"...\",\n'
174
+ ' \"address\": \"...\",\n'
175
  ' \"line_items\": [\n'
176
  ' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
177
  ' ],\n'
178
  ' \"other_field\": \"...\"\n'
179
+ " },\n"
180
+ ' \"pages\": [\n'
181
+ ' {\"page_number\": 1, \"text\": \"Full text from page 1\"},\n'
182
+ ' {\"page_number\": 2, \"text\": \"Full text from page 2\"}\n'
183
+ ' ]\n'
184
+ "}\n\n"
185
+ "IMPORTANT:\n"
186
+ "- Extract ALL text from ALL pages, including non-English languages\n"
187
+ "- Preserve structure, headings, and formatting in the full_text field\n"
188
+ "- Fill in fields with relevant extracted information\n"
189
+ "- If a field is not found, use empty string or omit it\n"
190
+ "- The full_text should contain everything readable from the document"
191
  )
192
  else:
193
  user_prompt = (
194
+ "Read this document using your vision capability and extract ALL text content. "
195
+ "I want the complete end-to-end text, preserving structure, headings, formatting, and content in all languages.\n\n"
196
+ "Extract every word, number, and piece of information, including any non-English text.\n\n"
197
+ "Respond with JSON in this format:\n"
198
  "{\n"
199
+ ' \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
200
  ' \"confidence\": number between 0 and 100,\n'
201
+ ' \"full_text\": \"Complete extracted text, preserving structure and formatting. Include all languages.\",\n'
202
  ' \"fields\": {\n'
203
  ' \"invoice_number\": \"...\",\n'
204
  ' \"date\": \"...\",\n'
 
206
  ' \"total_amount\": \"...\",\n'
207
  ' \"currency\": \"...\",\n'
208
  ' \"vendor_name\": \"...\",\n'
209
+ ' \"company_name\": \"...\",\n'
210
+ ' \"address\": \"...\",\n'
211
  ' \"line_items\": [\n'
212
  ' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
213
  ' ],\n'
214
  ' \"other_field\": \"...\"\n'
215
  " }\n"
216
+ "}\n\n"
217
+ "IMPORTANT:\n"
218
+ "- Extract ALL text, including non-English languages\n"
219
+ "- Preserve structure, headings, and formatting in the full_text field\n"
220
+ "- Fill in fields with relevant extracted information\n"
221
+ "- If a field is not found, use empty string or omit it"
222
  )
223
 
224
  # Build content array with text prompt and all image blocks
 
237
  "content": user_content,
238
  },
239
  ],
240
+ "max_tokens": 8192, # Increased for full text extraction from multi-page documents
241
  }
242
 
243
  headers = {