|
291 | 291 | "metadata": {}, |
292 | 292 | "source": [ |
293 | 293 | "## Text layer url is required for uploading entity annotations\n", |
294 | | - "global_key = \"0801.3483.pd\"\n", |
| 294 | + "global_key = \"0801.3483.pdf\"\n", |
295 | 295 | "img_url = {\n", |
296 | 296 | " \"row_data\": {\n", |
297 | 297 | " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", |
|
308 | 308 | "print(\"Failed data rows:\", task.failed_data_rows)" |
309 | 309 | ], |
310 | 310 | "cell_type": "code", |
311 | | - "outputs": [], |
| 311 | + "outputs": [ |
| 312 | + { |
| 313 | + "name": "stderr", |
| 314 | + "output_type": "stream", |
| 315 | + "text": [ |
| 316 | + "There are errors present. Please look at `task.errors` for more details\n" |
| 317 | + ] |
| 318 | + }, |
| 319 | + { |
| 320 | + "name": "stdout", |
| 321 | + "output_type": "stream", |
| 322 | + "text": [ |
| 323 | + "Errors: Duplicate global keys found: 0801.3483.pdf\n", |
| 324 | + "Failed data rows: [{'message': 'Duplicate global keys found: 0801.3483.pdf', 'failedDataRows': [{'globalKey': '0801.3483.pdf', 'rowData': 'https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf', 'attachmentInputs': [], 'mediaAttributes': {'textLayerUrl': 'https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json'}}]}]\n" |
| 325 | + ] |
| 326 | + } |
| 327 | + ], |
312 | 328 | "execution_count": null |
313 | 329 | }, |
314 | 330 | { |
|
431 | 447 | "source": [ |
432 | 448 | "project.create_batch(\n", |
433 | 449 | " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", |
434 | | - " global_keys=global_key, # A list of data rows or data row ids\n", |
| 450 | + " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", |
435 | 451 | " priority=5 # priority between 1(Highest) - 5(lowest)\n", |
436 | 452 | ")" |
437 | 453 | ], |
|
440 | 456 | { |
441 | 457 | "data": { |
442 | 458 | "text/plain": [ |
443 | | - "<Batch ID: e2152a50-c402-11ed-9933-959d2b6c8c3c>" |
| 459 | + "<Batch ID: eb670250-c749-11ed-a43b-3542d8b96bf7>" |
444 | 460 | ] |
445 | 461 | }, |
446 | | - "execution_count": 13, |
| 462 | + "execution_count": 33, |
447 | 463 | "metadata": {}, |
448 | 464 | "output_type": "execute_result" |
449 | 465 | } |
|
545 | 561 | { |
546 | 562 | "metadata": {}, |
547 | 563 | "source": [ |
548 | | - "# create a Label\n", |
| 564 | + "\n", |
549 | 565 | "\n", |
550 | 566 | "labels = []\n", |
551 | 567 | "\n", |
552 | | - "labels.append(lb_types.Label(\n", |
553 | | - " data=lb_types.DocumentData(\n", |
554 | | - " global_key=global_key),\n", |
555 | | - " annotations = [\n", |
556 | | - " entities_annotation,\n", |
557 | | - " checklist_annotation, \n", |
558 | | - " text_annotation,\n", |
559 | | - " radio_annotation\n", |
560 | | - " ]\n", |
| 568 | + "labels.append(\n", |
| 569 | + " lb_types.Label(\n", |
| 570 | + " data=lb_types.DocumentData(\n", |
| 571 | + " global_key=global_key),\n", |
| 572 | + " annotations = [\n", |
| 573 | + " entities_annotation,\n", |
| 574 | + " checklist_annotation, \n", |
| 575 | + " text_annotation,\n", |
| 576 | + " radio_annotation\n", |
| 577 | + " ]\n", |
561 | 578 | " )\n", |
562 | 579 | ")" |
563 | 580 | ], |
|
577 | 594 | "metadata": {}, |
578 | 595 | "source": [ |
579 | 596 | "\n", |
580 | | - "ndjson_annotation = []\n", |
| 597 | + "label_ndjson = []\n", |
581 | 598 | "for annot in [\n", |
582 | 599 | " entities_annotations_ndjson,\n", |
583 | 600 | " bbox_annotation_ndjson,\n", |
|
590 | 607 | " annot.update({\n", |
591 | 608 | " 'dataRow': {'globalKey': global_key},\n", |
592 | 609 | " })\n", |
593 | | - " ndjson_annotation.append(annot)\n", |
| 610 | + " label_ndjson.append(annot)\n", |
594 | 611 | "\n" |
595 | 612 | ], |
596 | 613 | "cell_type": "code", |
|
618 | 635 | " client = client,\n", |
619 | 636 | " project_id = project.uid,\n", |
620 | 637 | " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", |
621 | | - " predictions=ndjson_annotation)\n", |
| 638 | + " predictions=label_ndjson)\n", |
622 | 639 | "\n", |
623 | 640 | "upload_job.wait_until_done()\n", |
624 | 641 | "# Errors will appear for annotation uploads that failed.\n", |
625 | 642 | "print(\"Errors:\", upload_job.errors)\n", |
626 | 643 | "print(\"Status of uploads: \", upload_job.statuses)" |
627 | 644 | ], |
628 | 645 | "cell_type": "code", |
629 | | - "outputs": [], |
| 646 | + "outputs": [ |
| 647 | + { |
| 648 | + "name": "stdout", |
| 649 | + "output_type": "stream", |
| 650 | + "text": [ |
| 651 | + "Errors: []\n", |
| 652 | + "Status of uploads: [{'uuid': '262296fe-6877-432a-b12d-9c688f996f2c', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '1018a2d5-9e16-4a71-9bf4-58ab7f575a38', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '2755d3cd-5cfa-4022-be37-cf754734b8b0', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': 'ead202c6-d9d3-4f8a-9e2f-e42bb71573e2', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '5a586b1a-11f1-4f49-8aca-25cb2a39956c', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '17211b2a-abab-4d65-8ed6-585bbbca3483', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': 'c1a18383-df7c-4a72-9f45-917c8404af72', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}]\n" |
| 653 | + ] |
| 654 | + } |
| 655 | + ], |
630 | 656 | "execution_count": null |
631 | 657 | }, |
632 | 658 | { |
|
644 | 670 | " client = client, \n", |
645 | 671 | " project_id = project.uid, \n", |
646 | 672 | " name=\"label_import_job\"+str(uuid.uuid4()), \n", |
647 | | - " labels=ndjson_annotation)\n", |
| 673 | + " labels=label_ndjson)\n", |
648 | 674 | "\n", |
649 | 675 | "print(\"Errors:\", upload_job.errors)\n", |
650 | 676 | "print(\"Status of uploads: \", upload_job.statuses)" |
651 | 677 | ], |
652 | 678 | "cell_type": "code", |
653 | | - "outputs": [], |
| 679 | + "outputs": [ |
| 680 | + { |
| 681 | + "name": "stdout", |
| 682 | + "output_type": "stream", |
| 683 | + "text": [ |
| 684 | + "Errors: []\n", |
| 685 | + "Status of uploads: [{'uuid': 'bb0dbd33-4f7a-49b3-808a-a5d57563cc1a', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '32021451-e988-4e23-b526-46afd769bef3', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '2aaf89be-c02c-4d09-ae1a-097ff69de28a', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '80e57133-3bf5-4479-9301-be37dc634283', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': 'a4842956-f857-4c91-ba83-9ab5969159cb', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '38ee3c85-4762-4630-94d3-cf5fc10f9ce5', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}, {'uuid': '6ec97e39-b971-4a46-a591-efd0705e0817', 'dataRow': {'id': 'clfh1n29e1mh2078d3jt3b5cx', 'globalKey': '0801.3483.pdf'}, 'status': 'SUCCESS'}]\n" |
| 686 | + ] |
| 687 | + } |
| 688 | + ], |
654 | 689 | "execution_count": null |
655 | 690 | } |
656 | 691 | ] |
|
0 commit comments