Added sample for loading CSV data.

anthony-tuininga · anthony-tuininga · commit 6a98cdb19f67 · 2022-11-10T15:53:44.000-07:00
diff --git a/doc/src/release_notes.rst b/doc/src/release_notes.rst
@@ -55,6 +55,7 @@ Common Changes
 #)  Added support for Python 3.11.
 #)  Added attribute :attr:`DbObjectType.package_name` which contains the name
     of the package if the type is a PL/SQL type (otherwise, it will be `None`).
+#)  Added sample for loading data from a CSV file.
 #)  Improved test suite and documentation.
 
 
diff --git a/doc/src/user_guide/batch_statement.rst b/doc/src/user_guide/batch_statement.rst
@@ -303,3 +303,6 @@ prevent all data being inserted at once:
 Depending on data sizes and business requirements, database changes such as
 temporarily disabling redo logging on the table, or disabling indexes may also
 be beneficial.
+
+See `load_csv.py <https://github.com/oracle/python-oracledb/tree/main/
+samples/load_csv.py>`__ for a runnable example.
diff --git a/samples/data/load_csv.csv b/samples/data/load_csv.csv
@@ -0,0 +1,100 @@
+1,Biologist
+2,Doctor
+3,Call Center Representative
+4,Executive Director
+5,Laboratory Technician
+6,Cashier
+7,Global Logistics Supervisor
+8,Investment  Advisor
+9,HR Coordinator
+10,HR Specialist
+11,Investment  Advisor
+12,Baker
+13,Baker
+14,Cashier
+15,CNC Operator
+16,Software Engineer
+17,Call Center Representative
+18,Auditor
+19,Accountant
+20,Auditor
+21,Loan Officer
+22,Bellman
+23,Cashier
+24,Baker
+25,HR Coordinator
+26,Operator
+27,Service Supervisor
+28,Lecturer
+29,IT Support Staff
+30,Staffing Consultant
+31,Paramedic
+32,Associate Professor
+33,HR Coordinator
+34,HR Coordinator
+35,Restaurant Manager
+36,Webmaster
+37,Global Logistics Supervisor
+38,Bellman
+39,Design Engineer
+40,Cashier
+41,Global Logistics Supervisor
+42,Steward
+43,Fabricator
+44,Inspector
+45,Pharmacist
+46,Loan Officer
+47,Staffing Consultant
+48,Chef Manager
+49,Biologist
+50,Food Technologist
+51,Biologist
+52,Global Logistics Supervisor
+53,Laboratory Technician
+54,Design Engineer
+55,Fabricator
+56,Lecturer
+57,Loan Officer
+58,Investment  Advisor
+59,Treasurer
+60,Cash Manager
+61,Audiologist
+62,Fabricator
+63,Systems Administrator
+64,Health Educator
+65,Fabricator
+66,HR Specialist
+67,Physician
+68,Staffing Consultant
+69,Fabricator
+70,Physician
+71,Auditor
+72,Biologist
+73,Budget Analyst
+74,Bellman
+75,Restaurant Manager
+76,Stockbroker
+77,Paramedic
+78,Front Desk Coordinator
+79,Cashier
+80,CNC Operator
+81,Design Engineer
+82,Audiologist
+83,CNC Operator
+84,IT Support Staff
+85,Paramedic
+86,Fabricator
+87,Laboratory Technician
+88,Mobile Developer
+89,Global Logistics Supervisor
+90,Budget Analyst
+91,Lecturer
+92,Physician
+93,Retail Trainee
+94,Pharmacist
+95,Service Supervisor
+96,Production Painter
+97,Cash Manager
+98,Global Logistics Supervisor
+99,Treasurer
+100,Pharmacist
diff --git a/samples/load_csv.py b/samples/load_csv.py
@@ -0,0 +1,95 @@
+#------------------------------------------------------------------------------
+# Copyright (c) 2022, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# load_csv.py
+#
+# A sample showing how to load CSV data.
+# ------------------------------------------------------------------------------
+
+import csv
+import os
+
+import oracledb
+import sample_env
+
+# determine whether to use python-oracledb thin mode or thick mode
+if not sample_env.get_is_thin():
+    oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
+
+# CSV file.  This sample file has both valid rows and some rows with data too
+# large to insert.
+FILE_NAME = os.path.join('data', 'load_csv.csv')
+
+# Adjust the number of rows to be inserted in each iteration to meet your
+# memory and performance requirements.  Typically this is a large-ish value to
+# reduce the number of calls to executemany() to a reasonable size.  For this
+# demo with a small CSV file a smaller number is used to show the looping
+# behavior of the code.
+BATCH_SIZE = 19
+
+connection = oracledb.connect(user=sample_env.get_main_user(),
+                              password=sample_env.get_main_password(),
+                              dsn=sample_env.get_connect_string())
+
+def process_batch(batch_number, cursor, data):
+    print("processing batch", batch_number + 1)
+    cursor.executemany(sql, data, batcherrors=True)
+    for error in cursor.getbatcherrors():
+        line_num = (batch_number * BATCH_SIZE) + error.offset + 1
+        print("Error", error.message, "at line", line_num)
+
+with connection.cursor() as cursor:
+
+    # Clean up the table for demonstration purposes
+    cursor.execute('truncate table LoadCsvTab');
+
+    # Predefine the memory areas to match the table definition.
+    # This can improve performance by avoiding memory reallocations.
+    # Here, one parameter is passed for each of the columns.
+    # "None" is used for the ID column, since the size of NUMBER isn't
+    # variable.  The "25" matches the maximum expected data size for the
+    # NAME column
+    cursor.setinputsizes(None, 25)
+
+    # Loop over the data and insert it in batches
+    with open(FILE_NAME, 'r') as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        sql = "insert into LoadCsvTab (id, name) values (:1, :2)"
+        data = []
+        batch_number = 0
+        for line in csv_reader:
+            data.append((line[0], line[1]))
+            if len(data) % BATCH_SIZE == 0:
+                process_batch(batch_number, cursor, data)
+                data = []
+                batch_number += 1
+        if data:
+            process_batch(batch_number, cursor, data)
+
+        # In a production system you might choose to fix any invalid rows,
+        # re-insert them, and then commit.  Or you could rollback everything.
+        # In this sample we simply commit and ignore the invalid rows that
+        # couldn't be inserted.
+        connection.commit()
diff --git a/samples/sql/create_schema.sql b/samples/sql/create_schema.sql
@@ -238,6 +238,13 @@ create table &main_user..CustomersAsBlob (
 )
 /
 
+create table &main_user..LoadCsvTab (
+    id                    number not null,
+    name                  varchar2(25),
+    constraint LoadCsvTab_pk primary key (id)
+)
+/
+
 declare
     t_Version                           number;
 begin

Original file line number	Diff line number	Diff line change
`@@ -238,6 +238,13 @@ create table &main_user..CustomersAsBlob (`
`238`	`238`	`)`
`239`	`239`	`/`
`240`	`240`
	`241`	`+create table &main_user..LoadCsvTab (`
	`242`	`+ id number not null,`
	`243`	`+ name varchar2(25),`
	`244`	`+ constraint LoadCsvTab_pk primary key (id)`
	`245`	`+)`
	`246`	`+/`
	`247`	`+`
`241`	`248`	`declare`
`242`	`249`	`t_Version number;`
`243`	`250`	`begin`