|
| 1 | +#------------------------------------------------------------------------------ |
| 2 | +# Copyright (c) 2022, Oracle and/or its affiliates. |
| 3 | +# |
| 4 | +# This software is dual-licensed to you under the Universal Permissive License |
| 5 | +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License |
| 6 | +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose |
| 7 | +# either license. |
| 8 | +# |
| 9 | +# If you elect to accept the software under the Apache License, Version 2.0, |
| 10 | +# the following applies: |
| 11 | +# |
| 12 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 13 | +# you may not use this file except in compliance with the License. |
| 14 | +# You may obtain a copy of the License at |
| 15 | +# |
| 16 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 17 | +# |
| 18 | +# Unless required by applicable law or agreed to in writing, software |
| 19 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 20 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 21 | +# See the License for the specific language governing permissions and |
| 22 | +# limitations under the License. |
| 23 | +#------------------------------------------------------------------------------ |
| 24 | + |
| 25 | +#------------------------------------------------------------------------------ |
| 26 | +# load_csv.py |
| 27 | +# |
| 28 | +# A sample showing how to load CSV data. |
| 29 | +# ------------------------------------------------------------------------------ |
| 30 | + |
| 31 | +import csv |
| 32 | +import os |
| 33 | + |
| 34 | +import oracledb |
| 35 | +import sample_env |
| 36 | + |
| 37 | +# determine whether to use python-oracledb thin mode or thick mode |
| 38 | +if not sample_env.get_is_thin(): |
| 39 | + oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client()) |
| 40 | + |
| 41 | +# CSV file. This sample file has both valid rows and some rows with data too |
| 42 | +# large to insert. |
| 43 | +FILE_NAME = os.path.join('data', 'load_csv.csv') |
| 44 | + |
| 45 | +# Adjust the number of rows to be inserted in each iteration to meet your |
| 46 | +# memory and performance requirements. Typically this is a large-ish value to |
| 47 | +# reduce the number of calls to executemany() to a reasonable size. For this |
| 48 | +# demo with a small CSV file a smaller number is used to show the looping |
| 49 | +# behavior of the code. |
| 50 | +BATCH_SIZE = 19 |
| 51 | + |
| 52 | +connection = oracledb.connect(user=sample_env.get_main_user(), |
| 53 | + password=sample_env.get_main_password(), |
| 54 | + dsn=sample_env.get_connect_string()) |
| 55 | + |
| 56 | +def process_batch(batch_number, cursor, data): |
| 57 | + print("processing batch", batch_number + 1) |
| 58 | + cursor.executemany(sql, data, batcherrors=True) |
| 59 | + for error in cursor.getbatcherrors(): |
| 60 | + line_num = (batch_number * BATCH_SIZE) + error.offset + 1 |
| 61 | + print("Error", error.message, "at line", line_num) |
| 62 | + |
| 63 | +with connection.cursor() as cursor: |
| 64 | + |
| 65 | + # Clean up the table for demonstration purposes |
| 66 | + cursor.execute('truncate table LoadCsvTab'); |
| 67 | + |
| 68 | + # Predefine the memory areas to match the table definition. |
| 69 | + # This can improve performance by avoiding memory reallocations. |
| 70 | + # Here, one parameter is passed for each of the columns. |
| 71 | + # "None" is used for the ID column, since the size of NUMBER isn't |
| 72 | + # variable. The "25" matches the maximum expected data size for the |
| 73 | + # NAME column |
| 74 | + cursor.setinputsizes(None, 25) |
| 75 | + |
| 76 | + # Loop over the data and insert it in batches |
| 77 | + with open(FILE_NAME, 'r') as csv_file: |
| 78 | + csv_reader = csv.reader(csv_file, delimiter=',') |
| 79 | + sql = "insert into LoadCsvTab (id, name) values (:1, :2)" |
| 80 | + data = [] |
| 81 | + batch_number = 0 |
| 82 | + for line in csv_reader: |
| 83 | + data.append((line[0], line[1])) |
| 84 | + if len(data) % BATCH_SIZE == 0: |
| 85 | + process_batch(batch_number, cursor, data) |
| 86 | + data = [] |
| 87 | + batch_number += 1 |
| 88 | + if data: |
| 89 | + process_batch(batch_number, cursor, data) |
| 90 | + |
| 91 | + # In a production system you might choose to fix any invalid rows, |
| 92 | + # re-insert them, and then commit. Or you could rollback everything. |
| 93 | + # In this sample we simply commit and ignore the invalid rows that |
| 94 | + # couldn't be inserted. |
| 95 | + connection.commit() |
0 commit comments