From bab7385d3ec0a2ad3a5e79262452845a6bdc52ce Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 10:09:01 +1300 Subject: [PATCH 01/11] Automatic editor changes to trailing spaces and indentation --- lib/document_cloud/client.rb | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/document_cloud/client.rb b/lib/document_cloud/client.rb index ec4d425..0b74666 100644 --- a/lib/document_cloud/client.rb +++ b/lib/document_cloud/client.rb @@ -23,38 +23,38 @@ class Client include DocumentCloud::API::CreateProject include DocumentCloud::API::UpdateProject include DocumentCloud::Configurable - + def initialize(options={}) DocumentCloud::Configurable.keys.each do |key| instance_variable_set(:"@#{key}", options[key] || DocumentCloud.instance_variable_get(:"@#{key}")) end end - + # Perform HTTP GET request def get(path, params={}) RestClient.get request_base+path, {params: params} end - + # Perform HTTP POST request def post(path, params={}) RestClient.post request_base+path, params end - + # Perform HTTP PUT request def put(path, params={}) RestClient.put request_base+path, params end - + # Perform HTTP DELETE request def delete(path) RestClient.delete request_base+path end - + private - - def request_base - "#{DocumentCloud::Default.http_mode}://#{@email}:#{@password}@#{DocumentCloud::Default.endpoint}" - end - + + def request_base + "#{DocumentCloud::Default.http_mode}://#{@email}:#{@password}@#{DocumentCloud::Default.endpoint}" + end + end -end \ No newline at end of file +end From 2275adf2df7561a0beeff2fb4652208ee16677b7 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 11:46:17 +1300 Subject: [PATCH 02/11] Update RestClient --- documentcloud.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentcloud.gemspec b/documentcloud.gemspec index 94750fb..9ee63d4 100644 --- a/documentcloud.gemspec +++ b/documentcloud.gemspec @@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'document_cloud/version' Gem::Specification.new do |spec| - spec.add_dependency "rest-client", "~> 1.6.7" + spec.add_dependency "rest-client", "~> 1.8.0" spec.add_dependency 'multi_json', '~> 1.8' spec.name = 'documentcloud' From f8e6165f5752526df1d91d75a5330a1e7549b7d9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 11:47:54 +1300 Subject: [PATCH 03/11] Refactor to use `RestClient::Resource` This offloads URI escaping of the username and password for HTTP Basic Authentication to RestClient, and sets up the ability to request other authenticated resource URLs directly. --- lib/document_cloud.rb | 3 ++- lib/document_cloud/client.rb | 35 +++++++++++++++++++++-------- lib/document_cloud/configurable.rb | 36 ++++++++++-------------------- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/lib/document_cloud.rb b/lib/document_cloud.rb index 5eab31d..4d08950 100644 --- a/lib/document_cloud.rb +++ b/lib/document_cloud.rb @@ -1,6 +1,7 @@ require 'rest_client' require 'multi_json' require 'date' +require 'open-uri' require_relative 'document_cloud/document' require_relative 'document_cloud/project' @@ -41,4 +42,4 @@ def method_missing(method_name, *args, &block) end end -end \ No newline at end of file +end diff --git a/lib/document_cloud/client.rb b/lib/document_cloud/client.rb index 0b74666..f49777d 100644 --- a/lib/document_cloud/client.rb +++ b/lib/document_cloud/client.rb @@ -31,29 +31,46 @@ def initialize(options={}) end # Perform HTTP GET request - def get(path, params={}) - RestClient.get request_base+path, {params: params} + def get(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.get params end # Perform HTTP POST request - def post(path, params={}) - RestClient.post request_base+path, params + def post(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.post params end # Perform HTTP PUT request - def put(path, params={}) - RestClient.put request_base+path, params + def put(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.put params end # Perform HTTP DELETE request - def delete(path) - RestClient.delete request_base+path + def delete(params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.delete end private + def construct_url(path, use_request_base) + use_request_base ? request_base + path : path + end + + def construct_resource(url) + puts url, @email + RestClient::Resource.new(url, user: @email, password: @password) + end + def request_base - "#{DocumentCloud::Default.http_mode}://#{@email}:#{@password}@#{DocumentCloud::Default.endpoint}" + "#{DocumentCloud::Default.http_mode}://#{DocumentCloud::Default.endpoint}" end end diff --git a/lib/document_cloud/configurable.rb b/lib/document_cloud/configurable.rb index 129bb66..bbcc172 100644 --- a/lib/document_cloud/configurable.rb +++ b/lib/document_cloud/configurable.rb @@ -1,39 +1,27 @@ -require 'cgi' module DocumentCloud module Configurable attr_writer :email, :password - + class << self def keys @keys ||= [:email, :password] end end - + # Allow block configuration def configure yield self - format_email! - format_password! self end - + private - - # @return [Hash] - def credentials - { - email: @email, - password: @password - } - end - - # Ensure email is correct format for RestClient posts - def format_email! - @email = CGI.escape @email - end - def format_password! - @password = CGI.escape @password - end - + + # @return [Hash] + def credentials + { + email: @email, + password: @password + } + end end -end \ No newline at end of file +end From 1d1d4d4f07e7c97b4173128771b82119ef6eae3f Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 13:08:49 +1300 Subject: [PATCH 04/11] Created a page class --- lib/document_cloud/page.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 lib/document_cloud/page.rb diff --git a/lib/document_cloud/page.rb b/lib/document_cloud/page.rb new file mode 100644 index 0000000..09c5c40 --- /dev/null +++ b/lib/document_cloud/page.rb @@ -0,0 +1,11 @@ +module DocumentCloud + class Page + attr_reader :document_id, :page_number, :text + + def initialize(attrs={}) + @document_id = attrs[:id] + @page_number = attrs[:page_number] + @text = attrs[:text] + end + end +end From 01e86fcdc7936366593b6f9f7400cee2e3ed7a03 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 13:10:08 +1300 Subject: [PATCH 05/11] Page API to build page objects --- lib/document_cloud/api/page.rb | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 lib/document_cloud/api/page.rb diff --git a/lib/document_cloud/api/page.rb b/lib/document_cloud/api/page.rb new file mode 100644 index 0000000..4b500b3 --- /dev/null +++ b/lib/document_cloud/api/page.rb @@ -0,0 +1,26 @@ +module DocumentCloud + module API + module Page + include DocumentCloud::API::Utils + + # Fetch document entities + # + # @param id [String] The document id + # @returns [Hash] The fetched document entities + def page(id, page_number, url) + url = replace_page_number(url, page_number) + text = get(url, {}, false) + DocumentCloud::Page.new(id: id, + page_number: page_number, + text: text) + end + + private + + def replace_page_number(url, page_number) + url.sub(/\{page\}/, page_number.to_s) + end + + end + end +end From ce1ad8aa2a24e22e49a36780c22fc24a2b57ce26 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 13:11:42 +1300 Subject: [PATCH 06/11] Require page --- lib/document_cloud.rb | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/document_cloud.rb b/lib/document_cloud.rb index 4d08950..4bea205 100644 --- a/lib/document_cloud.rb +++ b/lib/document_cloud.rb @@ -5,6 +5,7 @@ require_relative 'document_cloud/document' require_relative 'document_cloud/project' +require_relative 'document_cloud/page' require_relative 'document_cloud/search_results' require_relative 'document_cloud/default' require_relative 'document_cloud/configurable' @@ -13,7 +14,7 @@ module DocumentCloud class << self include DocumentCloud::Configurable - + # Delegate to a DocumentCloud::Client # # @return [DocumentCloud::Client] @@ -21,25 +22,25 @@ def client @client = DocumentCloud::Client.new(credentials) unless defined?(@client) @client end - + # Has a client been initialized on the DocumentCloud module? # # @return [Boolean] def client? !!@client end - - + + def respond_to?(method_name, include_private=false) client.respond_to?(method_name, include_private) || super end - + private - def method_missing(method_name, *args, &block) - return super unless client.respond_to?(method_name) - client.send(method_name, *args, &block) - end - + def method_missing(method_name, *args, &block) + return super unless client.respond_to?(method_name) + client.send(method_name, *args, &block) + end + end end From e354c0240fe54db63c62d903cc3a158d47814260 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 13:12:04 +1300 Subject: [PATCH 07/11] Include page API in client --- lib/document_cloud/client.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/document_cloud/client.rb b/lib/document_cloud/client.rb index f49777d..84110c7 100644 --- a/lib/document_cloud/client.rb +++ b/lib/document_cloud/client.rb @@ -3,6 +3,7 @@ require_relative 'api/search' require_relative 'api/upload' require_relative 'api/document' +require_relative 'api/page' require_relative 'api/update' require_relative 'api/destroy' require_relative 'api/entities' @@ -20,6 +21,7 @@ class Client include DocumentCloud::API::Destroy include DocumentCloud::API::Entities include DocumentCloud::API::Projects + include DocumentCloud::API::Page include DocumentCloud::API::CreateProject include DocumentCloud::API::UpdateProject include DocumentCloud::Configurable From 731854ee64f8d1a5b1917fa8225402555a486dc9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 13:14:58 +1300 Subject: [PATCH 08/11] Include page methods in DocumentCloud::Document instances --- lib/document_cloud/document.rb | 35 +++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/lib/document_cloud/document.rb b/lib/document_cloud/document.rb index bcfe8fe..818fa1b 100644 --- a/lib/document_cloud/document.rb +++ b/lib/document_cloud/document.rb @@ -2,7 +2,7 @@ module DocumentCloud class Document attr_reader :id, :title, :access, :pages, :description, :source, :canonical_url, :language, :display_language, :created_at, :updated_at - + def initialize(attrs={}) @id = attrs[:id] @title = attrs[:title] @@ -17,35 +17,48 @@ def initialize(attrs={}) @updated_at = DateTime.parse(attrs[:updated_at]) @resources = attrs[:resources] end - + def pdf @resources[:pdf] end - + def print_annotations @resources[:print_annotations] end - + def related_article @resources[:related_article] end - + def text @resources[:text] end - + + def page(page_number) + return unless valid_page_number?(page_number) + @page ||= {} + @page[page_number] ||= DocumentCloud.page(@id, page_number, @resources[:page][:text]) + end + def thumbnail @resources[:thumbnail] end - + def image(page, size=1) @resources[:page][:image].gsub(/\{page\}/, page.to_s).gsub(/\{size\}/,size.to_s) end - + def entities @entities ||= DocumentCloud.entities(@id) - @entities end - + + private + + def valid_page_number?(page_number) + page_number.is_a? Integer and + page_number <= pages and + page_number > 0 + end + end -end \ No newline at end of file +end From a8efdd07c0e1666b2b2d562ad009442890b8a54c Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 15:30:53 +1300 Subject: [PATCH 09/11] Remove debugging puts line --- lib/document_cloud/client.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/document_cloud/client.rb b/lib/document_cloud/client.rb index 84110c7..23291d8 100644 --- a/lib/document_cloud/client.rb +++ b/lib/document_cloud/client.rb @@ -67,7 +67,6 @@ def construct_url(path, use_request_base) end def construct_resource(url) - puts url, @email RestClient::Resource.new(url, user: @email, password: @password) end From 95c147b3b5d020fc9bf8f0e81d97bccc93219405 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 16:34:04 +1300 Subject: [PATCH 10/11] Require URI for escaping URLs with the new authenticated page text API call --- lib/document_cloud.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/document_cloud.rb b/lib/document_cloud.rb index 4bea205..c4b33a3 100644 --- a/lib/document_cloud.rb +++ b/lib/document_cloud.rb @@ -1,7 +1,7 @@ require 'rest_client' require 'multi_json' require 'date' -require 'open-uri' +require 'uri' require_relative 'document_cloud/document' require_relative 'document_cloud/project' From 5909d7f8bdd776ef30d19408064852857416e6ce Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 19 Feb 2016 16:52:51 +1300 Subject: [PATCH 11/11] URI escape URL --- lib/document_cloud/api/page.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/document_cloud/api/page.rb b/lib/document_cloud/api/page.rb index 4b500b3..e1eaa3e 100644 --- a/lib/document_cloud/api/page.rb +++ b/lib/document_cloud/api/page.rb @@ -8,7 +8,7 @@ module Page # @param id [String] The document id # @returns [Hash] The fetched document entities def page(id, page_number, url) - url = replace_page_number(url, page_number) + url = URI.escape(replace_page_number(url, page_number)) text = get(url, {}, false) DocumentCloud::Page.new(id: id, page_number: page_number,