diff --git a/documentcloud.gemspec b/documentcloud.gemspec index 94750fb..9ee63d4 100644 --- a/documentcloud.gemspec +++ b/documentcloud.gemspec @@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'document_cloud/version' Gem::Specification.new do |spec| - spec.add_dependency "rest-client", "~> 1.6.7" + spec.add_dependency "rest-client", "~> 1.8.0" spec.add_dependency 'multi_json', '~> 1.8' spec.name = 'documentcloud' diff --git a/lib/document_cloud.rb b/lib/document_cloud.rb index 5eab31d..c4b33a3 100644 --- a/lib/document_cloud.rb +++ b/lib/document_cloud.rb @@ -1,9 +1,11 @@ require 'rest_client' require 'multi_json' require 'date' +require 'uri' require_relative 'document_cloud/document' require_relative 'document_cloud/project' +require_relative 'document_cloud/page' require_relative 'document_cloud/search_results' require_relative 'document_cloud/default' require_relative 'document_cloud/configurable' @@ -12,7 +14,7 @@ module DocumentCloud class << self include DocumentCloud::Configurable - + # Delegate to a DocumentCloud::Client # # @return [DocumentCloud::Client] @@ -20,25 +22,25 @@ def client @client = DocumentCloud::Client.new(credentials) unless defined?(@client) @client end - + # Has a client been initialized on the DocumentCloud module? # # @return [Boolean] def client? !!@client end - - + + def respond_to?(method_name, include_private=false) client.respond_to?(method_name, include_private) || super end - + private - def method_missing(method_name, *args, &block) - return super unless client.respond_to?(method_name) - client.send(method_name, *args, &block) - end - + def method_missing(method_name, *args, &block) + return super unless client.respond_to?(method_name) + client.send(method_name, *args, &block) + end + end -end \ No newline at end of file +end diff --git a/lib/document_cloud/api/page.rb b/lib/document_cloud/api/page.rb new file mode 100644 index 0000000..e1eaa3e --- /dev/null +++ b/lib/document_cloud/api/page.rb @@ -0,0 +1,26 @@ +module DocumentCloud + module API + module Page + include DocumentCloud::API::Utils + + # Fetch document entities + # + # @param id [String] The document id + # @returns [Hash] The fetched document entities + def page(id, page_number, url) + url = URI.escape(replace_page_number(url, page_number)) + text = get(url, {}, false) + DocumentCloud::Page.new(id: id, + page_number: page_number, + text: text) + end + + private + + def replace_page_number(url, page_number) + url.sub(/\{page\}/, page_number.to_s) + end + + end + end +end diff --git a/lib/document_cloud/client.rb b/lib/document_cloud/client.rb index ec4d425..23291d8 100644 --- a/lib/document_cloud/client.rb +++ b/lib/document_cloud/client.rb @@ -3,6 +3,7 @@ require_relative 'api/search' require_relative 'api/upload' require_relative 'api/document' +require_relative 'api/page' require_relative 'api/update' require_relative 'api/destroy' require_relative 'api/entities' @@ -20,41 +21,58 @@ class Client include DocumentCloud::API::Destroy include DocumentCloud::API::Entities include DocumentCloud::API::Projects + include DocumentCloud::API::Page include DocumentCloud::API::CreateProject include DocumentCloud::API::UpdateProject include DocumentCloud::Configurable - + def initialize(options={}) DocumentCloud::Configurable.keys.each do |key| instance_variable_set(:"@#{key}", options[key] || DocumentCloud.instance_variable_get(:"@#{key}")) end end - + # Perform HTTP GET request - def get(path, params={}) - RestClient.get request_base+path, {params: params} + def get(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.get params end - + # Perform HTTP POST request - def post(path, params={}) - RestClient.post request_base+path, params + def post(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.post params end - + # Perform HTTP PUT request - def put(path, params={}) - RestClient.put request_base+path, params + def put(path, params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.put params end - + # Perform HTTP DELETE request - def delete(path) - RestClient.delete request_base+path + def delete(params = {}, use_request_base = true) + url = construct_url(path, use_request_base) + resource = construct_resource(url) + resource.delete end - + private - - def request_base - "#{DocumentCloud::Default.http_mode}://#{@email}:#{@password}@#{DocumentCloud::Default.endpoint}" - end - + + def construct_url(path, use_request_base) + use_request_base ? request_base + path : path + end + + def construct_resource(url) + RestClient::Resource.new(url, user: @email, password: @password) + end + + def request_base + "#{DocumentCloud::Default.http_mode}://#{DocumentCloud::Default.endpoint}" + end + end -end \ No newline at end of file +end diff --git a/lib/document_cloud/configurable.rb b/lib/document_cloud/configurable.rb index 129bb66..bbcc172 100644 --- a/lib/document_cloud/configurable.rb +++ b/lib/document_cloud/configurable.rb @@ -1,39 +1,27 @@ -require 'cgi' module DocumentCloud module Configurable attr_writer :email, :password - + class << self def keys @keys ||= [:email, :password] end end - + # Allow block configuration def configure yield self - format_email! - format_password! self end - + private - - # @return [Hash] - def credentials - { - email: @email, - password: @password - } - end - - # Ensure email is correct format for RestClient posts - def format_email! - @email = CGI.escape @email - end - def format_password! - @password = CGI.escape @password - end - + + # @return [Hash] + def credentials + { + email: @email, + password: @password + } + end end -end \ No newline at end of file +end diff --git a/lib/document_cloud/document.rb b/lib/document_cloud/document.rb index bcfe8fe..818fa1b 100644 --- a/lib/document_cloud/document.rb +++ b/lib/document_cloud/document.rb @@ -2,7 +2,7 @@ module DocumentCloud class Document attr_reader :id, :title, :access, :pages, :description, :source, :canonical_url, :language, :display_language, :created_at, :updated_at - + def initialize(attrs={}) @id = attrs[:id] @title = attrs[:title] @@ -17,35 +17,48 @@ def initialize(attrs={}) @updated_at = DateTime.parse(attrs[:updated_at]) @resources = attrs[:resources] end - + def pdf @resources[:pdf] end - + def print_annotations @resources[:print_annotations] end - + def related_article @resources[:related_article] end - + def text @resources[:text] end - + + def page(page_number) + return unless valid_page_number?(page_number) + @page ||= {} + @page[page_number] ||= DocumentCloud.page(@id, page_number, @resources[:page][:text]) + end + def thumbnail @resources[:thumbnail] end - + def image(page, size=1) @resources[:page][:image].gsub(/\{page\}/, page.to_s).gsub(/\{size\}/,size.to_s) end - + def entities @entities ||= DocumentCloud.entities(@id) - @entities end - + + private + + def valid_page_number?(page_number) + page_number.is_a? Integer and + page_number <= pages and + page_number > 0 + end + end -end \ No newline at end of file +end diff --git a/lib/document_cloud/page.rb b/lib/document_cloud/page.rb new file mode 100644 index 0000000..09c5c40 --- /dev/null +++ b/lib/document_cloud/page.rb @@ -0,0 +1,11 @@ +module DocumentCloud + class Page + attr_reader :document_id, :page_number, :text + + def initialize(attrs={}) + @document_id = attrs[:id] + @page_number = attrs[:page_number] + @text = attrs[:text] + end + end +end