diff --git a/app/controllers/blogs_controller.rb b/app/controllers/blogs_controller.rb index 9e0888a..c7bf234 100644 --- a/app/controllers/blogs_controller.rb +++ b/app/controllers/blogs_controller.rb @@ -63,15 +63,19 @@ def destroy def import file = params[:attachment] - data = CSV.parse(file.to_io, headers: true, encoding: 'utf8') - # Start code to handle CSV data - ActiveRecord::Base.transaction do - data.each do |row| - current_user.blogs.create!(row.to_h) - end + + if file.present? + # Save the file temporarily in `tmp/` so the background job can access it + temp_path = Rails.root.join('tmp', "import_#{Time.now}.csv") + File.open(temp_path, 'wb') { |f| f.write(file.read) } + + # Enqueue the background job with the file path and user ID + BulkImportBlogsJob.perform_later(10000, temp_path, current_user.id) + + redirect_to blogs_path, notice: "Blog upload started. You will be notified when it's done." + else + redirect_to blogs_path, alert: "Please upload a valid CSV file." end - # End code to handle CSV data - redirect_to blogs_path end private diff --git a/app/jobs/blogs_api_response_job.rb b/app/jobs/blogs_api_response_job.rb new file mode 100644 index 0000000..ec8c359 --- /dev/null +++ b/app/jobs/blogs_api_response_job.rb @@ -0,0 +1,25 @@ +class BlogsApiResponseJob < ApplicationJob + queue_as :default + + def perform(blog_ids) + api_responses = [] + + blog_ids.each do |blog_id| + # Simulate some network latency + sleep(0.1) + + # AppendingHash for bulk insert response + api_responses << { + api_response_id: "blog-#{SecureRandom.hex}-#{blog_id}", + api_status: ApiResponse.api_statuses.keys.sample, + blog_id: blog_id, + created_at: Time.now, + updated_at: Time.now + } + end + + # Bulk Insert + ApiResponse.insert_all(api_responses) unless api_responses.empty? + end + +end diff --git a/app/jobs/bulk_import_blogs_job.rb b/app/jobs/bulk_import_blogs_job.rb new file mode 100644 index 0000000..16f6110 --- /dev/null +++ b/app/jobs/bulk_import_blogs_job.rb @@ -0,0 +1,16 @@ +class BulkImportBlogsJob < ApplicationJob + queue_as :default + + def perform(batch_size, file_path, user_id) + # Do something later + if File.exist?(file_path) + # Call the service to process the file + BulkImportBlogsService.new(batch_size, file_path, user_id).process + + # Delete the file after processing to save space + File.delete(file_path) if File.exist?(file_path) + else + Rails.logger.error "File not found: #{file_path}" + end + end +end diff --git a/app/jobs/memory_leak_job.rb b/app/jobs/memory_leak_job.rb index b55b51f..fa283f8 100644 --- a/app/jobs/memory_leak_job.rb +++ b/app/jobs/memory_leak_job.rb @@ -1,47 +1,30 @@ class MemoryLeakJob < ApplicationJob queue_as :default - # The purpose of this job to take each blog record and send it to an api and save that api response. - - def perform - blogs = Blog.all + def perform(batch_size = 1000) + # Array for storing valid blog ids + valid_blogs_ids = [] - blogs.each do |blog| - validate_and_process(blog) + # instead of Blog.all, we use Blog.find_in_batches to load records in batches to avoid memory overflow + Blog.find_in_batches(batch_size: 1000) do |batch| + batch.each do |blog| + if blog_valid?(blog) + valid_blogs_ids << blog.id + else + Rails.logger.info "Invalid blog: #{blog.id}" + end + end + # Enqueue the job with valid blog ids if the array reaches the batch size this job will fetch responses for each blog and save it + BlogsApiResponseJob.perform_later(valid_blogs_ids) + valid_blogs_ids.clear end + end private - def validate_and_process(blog) - # Perform some validations - if blog_valid?(blog) - # Make an API request - blog_to_api(blog) - else - Rails.logger.info "Invalid blog: #{blog.id}" - end - - # Memory leak: storing blog in an array, which grows indefinitely - @processed_blogs ||= [] - @processed_blogs << blog - - # This prevents the blog object from being garbage collected - end - def blog_valid?(blog) blog.title.present? && blog.body.present? end - def blog_to_api(blog) - # Mock API call - can be replaced with real HTTP call - sleep(0.1) # Simulate some network latency - temp_id = 'blog-id' - # Save API Response - api_response_id = temp_id.gsub("id","#{SecureRandom.hex}-#{blog.id}") - blog.api_responses.create!( - api_response_id: api_response_id, - api_status: ApiResponse.api_statuses.keys.sample - ) - end end \ No newline at end of file diff --git a/app/models/blog.rb b/app/models/blog.rb index 70d1a02..e52d951 100644 --- a/app/models/blog.rb +++ b/app/models/blog.rb @@ -1,4 +1,6 @@ class Blog < ApplicationRecord belongs_to :user has_many :api_responses + + validates :title, :body, presence: true end diff --git a/app/services/bulk_import_blogs_service.rb b/app/services/bulk_import_blogs_service.rb new file mode 100644 index 0000000..e758724 --- /dev/null +++ b/app/services/bulk_import_blogs_service.rb @@ -0,0 +1,68 @@ +require 'csv' + +class BulkImportBlogsService + + def initialize(batch_size, file_path, user_id) + @batch_size = batch_size + @file_path = file_path + @user_id = user_id + end + + def process + blogs_data = [] + invalid_blogs = [] + # Read CSV file line by line instead of loading the whole file + CSV.foreach(@file_path, headers: true, encoding: 'utf8') do |row| + # Validate the row before adding it to the blogs_data array + if check_blog_valid?(row) + blogs_data << row.to_h.merge( + user_id: @user_id, + created_at: Time.now, + updated_at: Time.now + ) + else + # Store invalid records for logging + invalid_blogs << row.to_h + end + # If batch size is reached, insert and log in bulk + handle_bulk_insert_and_logging(blogs_data, invalid_blogs) if blogs_data.size >= @batch_size + end + # Insert and log remaining records after finishing CSV processing + handle_bulk_insert_and_logging(blogs_data, invalid_blogs) + end + + private + + # Checks if a blog record is valid + def check_blog_valid?(row) + blog = Blog.new(row.to_h.merge(user_id: @user_id)) + blog.valid? + end + + # Performs bulk insert of valid blog records + def bulk_insert_blogs(blogs_data) + ActiveRecord::Base.transaction do + Blog.insert_all(blogs_data) + end + blogs_data.clear + end + + # Logs invalid blog records into a CSV file + def log_invalid_blogs(invalid_blogs) + error_log_file = Rails.root.join('log', "invalid_blogs_#{Time.current.to_i}.csv") + + CSV.open(error_log_file, "a") do |csv| + # Write headers only if the file is empty + csv << ["Title", "Body", "User ID", "Errors"] if File.zero?(error_log_file) + invalid_blogs.each { |record| csv << record } + end + invalid_blogs.clear + end + + # Handles both bulk inserting valid blogs and logging invalid ones + def handle_bulk_insert_and_logging(blogs_data, invalid_blogs) + bulk_insert_blogs(blogs_data) unless blogs_data.empty? + log_invalid_blogs(invalid_blogs) unless invalid_blogs.empty? + end + +end \ No newline at end of file