Skip to content

Commit 6a98cdb

Browse files
Added sample for loading CSV data.
1 parent 102b946 commit 6a98cdb

File tree

5 files changed

+206
-0
lines changed

5 files changed

+206
-0
lines changed

doc/src/release_notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Common Changes
5555
#) Added support for Python 3.11.
5656
#) Added attribute :attr:`DbObjectType.package_name` which contains the name
5757
of the package if the type is a PL/SQL type (otherwise, it will be `None`).
58+
#) Added sample for loading data from a CSV file.
5859
#) Improved test suite and documentation.
5960

6061

doc/src/user_guide/batch_statement.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,6 @@ prevent all data being inserted at once:
303303
Depending on data sizes and business requirements, database changes such as
304304
temporarily disabling redo logging on the table, or disabling indexes may also
305305
be beneficial.
306+
307+
See `load_csv.py <https://github.com/oracle/python-oracledb/tree/main/
308+
samples/load_csv.py>`__ for a runnable example.

samples/data/load_csv.csv

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
1,Biologist
2+
2,Doctor
3+
3,Call Center Representative
4+
4,Executive Director
5+
5,Laboratory Technician
6+
6,Cashier
7+
7,Global Logistics Supervisor
8+
8,Investment Advisor
9+
9,HR Coordinator
10+
10,HR Specialist
11+
11,Investment Advisor
12+
12,Baker
13+
13,Baker
14+
14,Cashier
15+
15,CNC Operator
16+
16,Software Engineer
17+
17,Call Center Representative
18+
18,Auditor
19+
19,Accountant
20+
20,Auditor
21+
21,Loan Officer
22+
22,Bellman
23+
23,Cashier
24+
24,Baker
25+
25,HR Coordinator
26+
26,Operator
27+
27,Service Supervisor
28+
28,Lecturer
29+
29,IT Support Staff
30+
30,Staffing Consultant
31+
31,Paramedic
32+
32,Associate Professor
33+
33,HR Coordinator
34+
34,HR Coordinator
35+
35,Restaurant Manager
36+
36,Webmaster
37+
37,Global Logistics Supervisor
38+
38,Bellman
39+
39,Design Engineer
40+
40,Cashier
41+
41,Global Logistics Supervisor
42+
42,Steward
43+
43,Fabricator
44+
44,Inspector
45+
45,Pharmacist
46+
46,Loan Officer
47+
47,Staffing Consultant
48+
48,Chef Manager
49+
49,Biologist
50+
50,Food Technologist
51+
51,Biologist
52+
52,Global Logistics Supervisor
53+
53,Laboratory Technician
54+
54,Design Engineer
55+
55,Fabricator
56+
56,Lecturer
57+
57,Loan Officer
58+
58,Investment Advisor
59+
59,Treasurer
60+
60,Cash Manager
61+
61,Audiologist
62+
62,Fabricator
63+
63,Systems Administrator
64+
64,Health Educator
65+
65,Fabricator
66+
66,HR Specialist
67+
67,Physician
68+
68,Staffing Consultant
69+
69,Fabricator
70+
70,Physician
71+
71,Auditor
72+
72,Biologist
73+
73,Budget Analyst
74+
74,Bellman
75+
75,Restaurant Manager
76+
76,Stockbroker
77+
77,Paramedic
78+
78,Front Desk Coordinator
79+
79,Cashier
80+
80,CNC Operator
81+
81,Design Engineer
82+
82,Audiologist
83+
83,CNC Operator
84+
84,IT Support Staff
85+
85,Paramedic
86+
86,Fabricator
87+
87,Laboratory Technician
88+
88,Mobile Developer
89+
89,Global Logistics Supervisor
90+
90,Budget Analyst
91+
91,Lecturer
92+
92,Physician
93+
93,Retail Trainee
94+
94,Pharmacist
95+
95,Service Supervisor
96+
96,Production Painter
97+
97,Cash Manager
98+
98,Global Logistics Supervisor
99+
99,Treasurer
100+
100,Pharmacist

samples/load_csv.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#------------------------------------------------------------------------------
2+
# Copyright (c) 2022, Oracle and/or its affiliates.
3+
#
4+
# This software is dual-licensed to you under the Universal Permissive License
5+
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
6+
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
7+
# either license.
8+
#
9+
# If you elect to accept the software under the Apache License, Version 2.0,
10+
# the following applies:
11+
#
12+
# Licensed under the Apache License, Version 2.0 (the "License");
13+
# you may not use this file except in compliance with the License.
14+
# You may obtain a copy of the License at
15+
#
16+
# https://www.apache.org/licenses/LICENSE-2.0
17+
#
18+
# Unless required by applicable law or agreed to in writing, software
19+
# distributed under the License is distributed on an "AS IS" BASIS,
20+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
# See the License for the specific language governing permissions and
22+
# limitations under the License.
23+
#------------------------------------------------------------------------------
24+
25+
#------------------------------------------------------------------------------
26+
# load_csv.py
27+
#
28+
# A sample showing how to load CSV data.
29+
# ------------------------------------------------------------------------------
30+
31+
import csv
32+
import os
33+
34+
import oracledb
35+
import sample_env
36+
37+
# determine whether to use python-oracledb thin mode or thick mode
38+
if not sample_env.get_is_thin():
39+
oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
40+
41+
# CSV file. This sample file has both valid rows and some rows with data too
42+
# large to insert.
43+
FILE_NAME = os.path.join('data', 'load_csv.csv')
44+
45+
# Adjust the number of rows to be inserted in each iteration to meet your
46+
# memory and performance requirements. Typically this is a large-ish value to
47+
# reduce the number of calls to executemany() to a reasonable size. For this
48+
# demo with a small CSV file a smaller number is used to show the looping
49+
# behavior of the code.
50+
BATCH_SIZE = 19
51+
52+
connection = oracledb.connect(user=sample_env.get_main_user(),
53+
password=sample_env.get_main_password(),
54+
dsn=sample_env.get_connect_string())
55+
56+
def process_batch(batch_number, cursor, data):
57+
print("processing batch", batch_number + 1)
58+
cursor.executemany(sql, data, batcherrors=True)
59+
for error in cursor.getbatcherrors():
60+
line_num = (batch_number * BATCH_SIZE) + error.offset + 1
61+
print("Error", error.message, "at line", line_num)
62+
63+
with connection.cursor() as cursor:
64+
65+
# Clean up the table for demonstration purposes
66+
cursor.execute('truncate table LoadCsvTab');
67+
68+
# Predefine the memory areas to match the table definition.
69+
# This can improve performance by avoiding memory reallocations.
70+
# Here, one parameter is passed for each of the columns.
71+
# "None" is used for the ID column, since the size of NUMBER isn't
72+
# variable. The "25" matches the maximum expected data size for the
73+
# NAME column
74+
cursor.setinputsizes(None, 25)
75+
76+
# Loop over the data and insert it in batches
77+
with open(FILE_NAME, 'r') as csv_file:
78+
csv_reader = csv.reader(csv_file, delimiter=',')
79+
sql = "insert into LoadCsvTab (id, name) values (:1, :2)"
80+
data = []
81+
batch_number = 0
82+
for line in csv_reader:
83+
data.append((line[0], line[1]))
84+
if len(data) % BATCH_SIZE == 0:
85+
process_batch(batch_number, cursor, data)
86+
data = []
87+
batch_number += 1
88+
if data:
89+
process_batch(batch_number, cursor, data)
90+
91+
# In a production system you might choose to fix any invalid rows,
92+
# re-insert them, and then commit. Or you could rollback everything.
93+
# In this sample we simply commit and ignore the invalid rows that
94+
# couldn't be inserted.
95+
connection.commit()

samples/sql/create_schema.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,13 @@ create table &main_user..CustomersAsBlob (
238238
)
239239
/
240240

241+
create table &main_user..LoadCsvTab (
242+
id number not null,
243+
name varchar2(25),
244+
constraint LoadCsvTab_pk primary key (id)
245+
)
246+
/
247+
241248
declare
242249
t_Version number;
243250
begin

0 commit comments

Comments
 (0)