Spaces:

Seth0330
/

AIEXTRACT1

Sleeping

App Files Files Community

Seth0330 commited on 14 days ago

Commit

b5224a9

verified ·

1 Parent(s): ef35ecf

Update backend/app/openrouter_client.py

Browse files

Files changed (1) hide show

backend/app/openrouter_client.py +40 -18

backend/app/openrouter_client.py CHANGED Viewed

@@ -146,20 +146,23 @@ async def extract_fields_from_document(
     print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
     system_prompt = (
-        "You are a document extraction engine. "
-        "You analyze invoices, receipts, contracts, reports and similar documents, "
-        "and output structured JSON only (no explanations or comments)."
     )
-    # Update prompt for multi-page documents
     if len(image_blocks) > 1:
         user_prompt = (
-            f"Extract important key-value pairs from this {len(image_blocks)}-page document. "
-            "Analyze all pages and combine the information into a single JSON response.\n"
-            "Use this shape:\n"
             "{\n"
-            '  \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
             '  \"confidence\": number between 0 and 100,\n'
             '  \"fields\": {\n'
             '    \"invoice_number\": \"...\",\n'
             '    \"date\": \"...\",\n'
@@ -167,22 +170,35 @@ async def extract_fields_from_document(
             '    \"total_amount\": \"...\",\n'
             '    \"currency\": \"...\",\n'
             '    \"vendor_name\": \"...\",\n'
             '    \"line_items\": [\n'
             '       {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
             '    ],\n'
             '    \"other_field\": \"...\"\n'
-            "  }\n"
-            "}\n"
-            "If fields are missing or not applicable, simply omit them. "
-            "Combine information from all pages into a single response."
         )
     else:
         user_prompt = (
-            "Extract important key-value pairs from the document and respond with JSON only.\n"
-            "Use this shape:\n"
             "{\n"
-            '  \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
             '  \"confidence\": number between 0 and 100,\n'
             '  \"fields\": {\n'
             '    \"invoice_number\": \"...\",\n'
             '    \"date\": \"...\",\n'
@@ -190,13 +206,19 @@ async def extract_fields_from_document(
             '    \"total_amount\": \"...\",\n'
             '    \"currency\": \"...\",\n'
             '    \"vendor_name\": \"...\",\n'
             '    \"line_items\": [\n'
             '       {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
             '    ],\n'
             '    \"other_field\": \"...\"\n'
             "  }\n"
-            "}\n"
-            "If fields are missing or not applicable, simply omit them."
         )
     # Build content array with text prompt and all image blocks
@@ -215,7 +237,7 @@ async def extract_fields_from_document(
                 "content": user_content,
             },
         ],
-        "max_tokens": 4096,  # Increased for multi-page documents
     }
     headers = {

     print(f"[INFO] Generated {len(image_blocks)} image block(s) for processing")
     system_prompt = (
+        "You are a document extraction engine with vision capabilities. "
+        "You read and extract text from documents in any language, preserving structure, formatting, and all content. "
+        "You output structured JSON with both the full extracted text and key-value pairs."
     )
+    # Update prompt for multi-page documents - ask for full text extraction first
     if len(image_blocks) > 1:
         user_prompt = (
+            f"Read this {len(image_blocks)}-page document using your vision capability and extract ALL text content. "
+            "I want the complete end-to-end text from all pages, preserving structure, headings, formatting, and content in all languages.\n\n"
+            "Analyze ALL pages thoroughly, including any non-English text (Punjabi, Hindi, or other languages). "
+            "Extract every word, number, and piece of information from every page.\n\n"
+            "Respond with JSON in this format:\n"
             "{\n"
+            '  \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
             '  \"confidence\": number between 0 and 100,\n'
+            '  \"full_text\": \"Complete extracted text from all pages, preserving structure and formatting. Include all languages.\",\n'
             '  \"fields\": {\n'
             '    \"invoice_number\": \"...\",\n'
             '    \"date\": \"...\",\n'
             '    \"total_amount\": \"...\",\n'
             '    \"currency\": \"...\",\n'
             '    \"vendor_name\": \"...\",\n'
+            '    \"company_name\": \"...\",\n'
+            '    \"address\": \"...\",\n'
             '    \"line_items\": [\n'
             '       {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
             '    ],\n'
             '    \"other_field\": \"...\"\n'
+            "  },\n"
+            '  \"pages\": [\n'
+            '    {\"page_number\": 1, \"text\": \"Full text from page 1\"},\n'
+            '    {\"page_number\": 2, \"text\": \"Full text from page 2\"}\n'
+            '  ]\n'
+            "}\n\n"
+            "IMPORTANT:\n"
+            "- Extract ALL text from ALL pages, including non-English languages\n"
+            "- Preserve structure, headings, and formatting in the full_text field\n"
+            "- Fill in fields with relevant extracted information\n"
+            "- If a field is not found, use empty string or omit it\n"
+            "- The full_text should contain everything readable from the document"
         )
     else:
         user_prompt = (
+            "Read this document using your vision capability and extract ALL text content. "
+            "I want the complete end-to-end text, preserving structure, headings, formatting, and content in all languages.\n\n"
+            "Extract every word, number, and piece of information, including any non-English text.\n\n"
+            "Respond with JSON in this format:\n"
             "{\n"
+            '  \"doc_type\": \"invoice | receipt | contract | report | notice | other\",\n'
             '  \"confidence\": number between 0 and 100,\n'
+            '  \"full_text\": \"Complete extracted text, preserving structure and formatting. Include all languages.\",\n'
             '  \"fields\": {\n'
             '    \"invoice_number\": \"...\",\n'
             '    \"date\": \"...\",\n'
             '    \"total_amount\": \"...\",\n'
             '    \"currency\": \"...\",\n'
             '    \"vendor_name\": \"...\",\n'
+            '    \"company_name\": \"...\",\n'
+            '    \"address\": \"...\",\n'
             '    \"line_items\": [\n'
             '       {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
             '    ],\n'
             '    \"other_field\": \"...\"\n'
             "  }\n"
+            "}\n\n"
+            "IMPORTANT:\n"
+            "- Extract ALL text, including non-English languages\n"
+            "- Preserve structure, headings, and formatting in the full_text field\n"
+            "- Fill in fields with relevant extracted information\n"
+            "- If a field is not found, use empty string or omit it"
         )
     # Build content array with text prompt and all image blocks
                 "content": user_content,
             },
         ],
+        "max_tokens": 8192,  # Increased for full text extraction from multi-page documents
     }
     headers = {