Skip to content

Commit 089c51f

Browse files
p-mongop
andauthored
Fix RUBY-2210 GridFS does not write data correctly when given String in UTF-8 encoding (#1858)
Co-authored-by: Oleg Pudeyev <oleg@bsdpower.com>
1 parent 164a336 commit 089c51f

File tree

5 files changed

+79
-16
lines changed

5 files changed

+79
-16
lines changed

lib/mongo/grid/file.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ def inspect
104104
# chunk objects and assemble the data. If we have an IO object, then
105105
# it's the original file data and we must split it into chunks and set
106106
# the original data itself.
107+
#
108+
# @param [ IO, String, Array<BSON::Document> ] value The file object,
109+
# file contents or chunk documents.
110+
#
111+
# @return [ Array<Grid::File::Chunk> ] Array of chunks.
107112
def initialize_chunks!(value)
108113
if value.is_a?(Array)
109114
@chunks = value.map{ |doc| Chunk.new(doc) }

lib/mongo/grid/file/chunk.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ class << self
151151
# @return [ String ] The assembled data.
152152
#
153153
# @since 2.0.0
154+
# @api private
154155
def assemble(chunks)
155156
chunks.reduce(''){ |data, chunk| data << chunk.data.data }
156157
end
@@ -167,6 +168,7 @@ def assemble(chunks)
167168
# @return [ Array<Chunk> ] The chunks of the data.
168169
#
169170
# @since 2.0.0
171+
# @api private
170172
def split(io, file_info, offset = 0)
171173
io = StringIO.new(io) if io.is_a?(String)
172174
parts = Enumerator.new { |y| y << io.read(file_info.chunk_size) until io.eof? }

lib/mongo/grid/fs_bucket.rb

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def initialize(database, options = {})
179179
#
180180
# @since 2.0.0
181181
def prefix
182-
@options[:fs_name] || @options[:bucket_name]|| DEFAULT_ROOT
182+
@options[:fs_name] || @options[:bucket_name] || DEFAULT_ROOT
183183
end
184184

185185
# Remove a single file from the GridFS.
@@ -232,7 +232,8 @@ def delete(id)
232232
#
233233
# @since 2.1.0
234234
def open_download_stream(id, options = nil)
235-
read_stream(id, options).tap do |stream|
235+
options = Utils.shallow_symbolize_keys(options || {})
236+
read_stream(id, **options).tap do |stream|
236237
if block_given?
237238
begin
238239
yield stream
@@ -350,15 +351,15 @@ def download_to_stream_by_name(filename, io, opts = {})
350351
download_to_stream(open_download_stream_by_name(filename, opts).file_id, io)
351352
end
352353

353-
# Opens an upload stream to GridFS to which the contents of a user file came be written.
354+
# Opens an upload stream to GridFS to which the contents of a file or
355+
# blob can be written.
354356
#
355-
# @example Open a stream to which the contents of a file came be written.
356-
# fs.open_upload_stream('a-file.txt')
357-
#
358-
# @param [ String ] filename The filename of the file to upload.
357+
# @param [ String ] filename The name of the file in GridFS.
359358
# @param [ Hash ] opts The options for the write stream.
360359
#
361-
# @option opts [ Object ] :file_id An optional unique file id. An ObjectId is generated otherwise.
360+
# @option opts [ Object ] :file_id An optional unique file id.
361+
# A BSON::ObjectId is automatically generated if a file id is not
362+
# provided.
362363
# @option opts [ Integer ] :chunk_size Override the default chunk size.
363364
# @option opts [ Hash ] :metadata User data for the 'metadata' field of the files
364365
# collection document.
@@ -377,7 +378,8 @@ def download_to_stream_by_name(filename, io, opts = {})
377378
#
378379
# @since 2.1.0
379380
def open_upload_stream(filename, opts = {})
380-
write_stream(filename, opts).tap do |stream|
381+
opts = Utils.shallow_symbolize_keys(opts)
382+
write_stream(filename, **opts).tap do |stream|
381383
if block_given?
382384
begin
383385
yield stream
@@ -467,12 +469,12 @@ def write_concern
467469
#
468470
# @option opts [ BSON::Document ] :file_info_doc For internal
469471
# driver use only. A BSON document to use as file information.
470-
def read_stream(id, opts = nil)
471-
Stream.get(self, Stream::READ_MODE, { file_id: id }.update(options).update(opts || {}))
472+
def read_stream(id, **opts)
473+
Stream.get(self, Stream::READ_MODE, { file_id: id }.update(options).update(opts))
472474
end
473475

474-
def write_stream(filename, opts)
475-
Stream.get(self, Stream::WRITE_MODE, { filename: filename }.merge!(options).merge!(opts))
476+
def write_stream(filename, **opts)
477+
Stream.get(self, Stream::WRITE_MODE, { filename: filename }.update(options).update(opts))
476478
end
477479

478480
def chunks_name

lib/mongo/grid/stream/write.rb

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,26 @@ def initialize(fs, options)
8282
@open = true
8383
end
8484

85-
# Write to the GridFS bucket from the source stream.
85+
# Write to the GridFS bucket from the source stream or a string.
8686
#
8787
# @example Write to GridFS.
8888
# stream.write(io)
8989
#
90-
# @param [ IO ] io The source io stream to upload from.
90+
# @param [ String | IO ] io The string or IO object to upload from.
9191
#
9292
# @return [ Stream::Write ] self The write stream itself.
9393
#
9494
# @since 2.1.0
9595
def write(io)
9696
ensure_open!
9797
@indexes ||= ensure_indexes!
98-
@length += io.size
98+
@length += if io.respond_to?(:bytesize)
99+
# String objects
100+
io.bytesize
101+
else
102+
# IO objects
103+
io.size
104+
end
99105
chunks = File::Chunk.split(io, file_info, @n)
100106
@n += chunks.size
101107
chunks_collection.insert_many(chunks) unless chunks.empty?
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
require 'spec_helper'
2+
3+
describe 'GridFS bucket integration' do
4+
let(:fs) do
5+
authorized_client.database.fs
6+
end
7+
8+
describe 'UTF-8 string write' do
9+
let(:data) { "hello\u2210" }
10+
11+
before do
12+
data.length.should_not == data.bytesize
13+
end
14+
15+
shared_examples 'round-trips' do
16+
it 'round-trips' do
17+
stream = fs.open_upload_stream('test') do |stream|
18+
stream.write(data_to_write)
19+
end
20+
21+
actual = nil
22+
fs.open_download_stream(stream.file_id) do |stream|
23+
actual = stream.read
24+
end
25+
26+
actual.encoding.name.should == 'ASCII-8BIT'
27+
actual.should == data.dup.force_encoding('binary')
28+
end
29+
end
30+
31+
context 'in binary encoding' do
32+
let(:data_to_write) do
33+
data.force_encoding('binary').freeze
34+
end
35+
36+
it_behaves_like 'round-trips'
37+
end
38+
39+
context 'in UTF-8 encoding' do
40+
let(:data_to_write) do
41+
data.encoding.name.should == 'UTF-8'
42+
data.freeze
43+
end
44+
45+
it_behaves_like 'round-trips'
46+
end
47+
end
48+
end

0 commit comments

Comments
 (0)