diff --git a/gemmapy/gemmapy_api.py b/gemmapy/gemmapy_api.py
index 4b34546..c39e4f1 100644
--- a/gemmapy/gemmapy_api.py
+++ b/gemmapy/gemmapy_api.py
@@ -2,70 +2,128 @@
 """
 Gemma python API (https://gemma.msl.ubc.ca/rest/v2/)
 """
+import enum
+import json
+import logging
+import os
+import subprocess
+import tarfile
+import tempfile
+import warnings
+from getpass import getpass
+from io import StringIO, BytesIO
+from os.path import join
+from typing import Optional, List, Callable, Any
 
-from gemmapy import sdk
-from gemmapy import _processors as ps
-from gemmapy import _validators as vs
-from gemmapy import _subprocessors as sub
-from typing import Optional, List, Callable
-from pandas import DataFrame
-import pandas as pd
-import numpy as np
 import anndata as ad
+import numpy as np
+import pandas as pd
+import scanpy
 from anndata import AnnData
-from io import StringIO
-import warnings
-import json
+from pandas import DataFrame
 
+from gemmapy import _processors as ps
+from gemmapy import _subprocessors as sub
+from gemmapy import _validators as vs
+from gemmapy import sdk
+
+logger = logging.getLogger(__name__)
+
+class GemmaPath(enum.Enum):
+    PROD = "prod"
+    DEV = "dev"
+    STAGING = "staging"
 
 class GemmaPy(object):
     """
     Main API class
     """
 
-    def __init__(self, auth:list|tuple=None, path="prod"):
+    def __init__(self, auth: Optional[list | tuple] = None,
+                 path: Optional[GemmaPath | str] = None):
         """
         :param list auth: (optional) A list or tuple of credential strings, e.g.
-          (your_username, your_password)
-        :param bool devel: (optional) If True development version of Gemma API will be
-          used. Default is False.
+         (your_username, your_password). Note that you may also define your Gemma
+         credentials using `GEMMA_USERNAME` and `GEMMA_PASSWORD` environment
+         variables. For a more secure approach, you can also provide a
+         `GEMMA_PASSWORD_CMD` variable that produces your password
+         (e.g. `pass gemma` using https://www.passwordstore.org/). If only a
+         username is supplied, a password prompt will be used.
+        :param str path: (optional) Override the path to use for the REST API.
+         You may use one of the enumerated values in GemmaPath or a string.
+         Three special values are recognized: "prod", "staging" and "dev",
+         although only "prod" is publicly accessible. The default is the value
+         from the OpenAPI specification used to generate the SDK, which is
+         usually equivalent to PROD.
         """
 
         configuration = sdk.Configuration()
-        if path == "prod":
-            pass
-            # configuration.host = 'https://gemma.msl.ubc.ca/rest/v2'
-        elif path == 'dev':
+        if path == GemmaPath.PROD or path == 'prod':
+            logger.debug("Using production endpoint.")
+            configuration.host = 'https://gemma.msl.ubc.ca/rest/v2'
+        elif path == GemmaPath.DEV or path == 'dev':
             configuration.host = 'https://dev.gemma.msl.ubc.ca/rest/v2'
-        elif path == 'staging':
+        elif path == GemmaPath.STAGING or path == 'staging':
             configuration.host = "https://staging-gemma.msl.ubc.ca/rest/v2"
-        else:
+        elif path is not None:
             configuration.host = path
-        
+        else:
+            # use the default configuration in the openapi.json file
+            pass
 
         if auth is not None:
+            if len(auth) != 1 and len(auth) != 2:
+                raise ValueError(
+                    'There must be exactly one or two values in the auth parameter.')
             configuration.username = auth[0]
-            configuration.password = auth[1]
+            if len(auth) == 2:
+                configuration.password = auth[1]
+            else:
+                configuration.password = getpass(
+                    f'Supply your password for {configuration.username}@{configuration.host}: ')
+        elif os.environ.get('GEMMA_USERNAME'):
+            logger.debug(
+                'Reading username for %s from $GEMMA_USERNAME.',
+                configuration.host)
+            configuration.username = os.getenv('GEMMA_USERNAME')
+            if os.getenv('GEMMA_PASSWORD'):
+                logger.debug("Reading password for %s@%s from $GEMMA_PASSWORD.",
+                             configuration.username, configuration.host)
+                configuration.password = os.getenv('GEMMA_PASSWORD')
+            elif os.getenv('GEMMA_PASSWORD_CMD'):
+                logger.debug(
+                    "Reading password for %s@%s from $GEMMA_PASSWORD_CMD (%s).",
+                    configuration.username, configuration.host,
+                    os.getenv('GEMMA_PASSWORD_CMD'))
+                password = subprocess.run(os.getenv('GEMMA_PASSWORD_CMD'),
+                                          shell=True, check=True,
+                                          stdout=subprocess.PIPE,
+                                          text=True).stdout
+                configuration.password = password.splitlines()[0]
+            else:
+                logger.debug(
+                    'Could not read GEMMA_PASSWORD nor GEMMA_PASSWORD_CMD from environment, the password will be prompted.')
+                configuration.password = getpass(
+                    f'Supply your password for {configuration.username}@{configuration.host}: ')
 
         # create an instance of the API class
         self.raw = sdk.DefaultApi(sdk.ApiClient(configuration))
 
-
     # /resultSets/count get_number_of_result_sets ------
     # unimplemented
     # we don't need this here, not included
-    
+
     # /resultSets/{resultSet}
-    # this was only used in the past to access result set metadata by 
+    # this was only used in the past to access result set metadata by
     # using a hidden parameter. this information can be accessed using get_result_sets
     # enpoint instead
-    
-    # /resultSets/{resultSet_}, get_result_set_as_tsv ------ 
+
+    # /resultSets/{resultSet_}, get_result_set_as_tsv ------
     # made internal to not cause unneeded confusion
     # use get_differential_expression_values instead
     def __get_result_set(self, result_set:int, **kwargs):
         """
-        
+
         :param result_set: DESCRIPTION
         :type result_set: int
         :param **kwargs: Additional arguments to pass to raw.get_result_set_as_tsv
@@ -73,18 +131,18 @@ def __get_result_set(self, result_set:int, **kwargs):
         :rtype: TYPE
 
         """
-        
+
         response = self.raw.get_result_set(result_set, **kwargs,
                                            _force_table = True)
-        
+
         df = ps.process_de_matrix(response, result_set,self)
-        
+
         return df
-        
 
-    
+
+
     # /resultSets, get_result_sets -----
-    
+
     def get_result_sets(self,
                         datasets:Optional[List[str|int]] = None,
                         result_sets:Optional[List[int]] = None,
@@ -95,29 +153,29 @@ def get_result_sets(self,
                         **kwargs)->DataFrame:
         """Returns queried result set
 
-        Output and usage of this function is mostly identical to 
+        Output and usage of this function is mostly identical to
         get_dataset_differential_expression_analyses. The principal difference
-        being the ability to restrict your result sets, being able to query 
-        across multiple datasets and being able to use the filter argument to 
+        being the ability to restrict your result sets, being able to query
+        across multiple datasets and being able to use the filter argument to
         search based on result set properties.
 
 
-        
+
         :param datasets: A numerical dataset identifier or a dataset short name, defaults to None
         :type datasets: Optional[List[str|int]], optional
         :param result_sets: 	A result set identifier. Note that result set identifiers are not static and can change when Gemma re-runs analyses internally. Whem using these as inputs, try to make sure you access a currently existing result set ID by basing them on result sets returned for a particular dataset or filter used in get_result_sets, defaults to None
         :type result_sets: Optional[List[int]], optional
-        :param filter: Filter results by matching expression. Use 
-          filter_properties function to get a list of all available parameters. 
-          These properties can be combined using "and" "or" clauses and may 
-          contain common operators such as "=", "<" or "in". (e.g. 
-          "taxon.commonName = human", "taxon.commonName in (human,mouse), 
+        :param filter: Filter results by matching expression. Use
+          filter_properties function to get a list of all available parameters.
+          These properties can be combined using "and" "or" clauses and may
+          contain common operators such as "=", "<" or "in". (e.g.
+          "taxon.commonName = human", "taxon.commonName in (human,mouse),
           "id < 1000"), defaults to None
         :type filter: str, optional
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -127,10 +185,10 @@ def get_result_sets(self,
           defaults to "+id"
         :type sort: str, optional
         :param **kwargs: Additional arguments to pass to raw.get_result_sets
-        :return: A DataFrame with information about the queried result sets. 
+        :return: A DataFrame with information about the queried result sets.
           Note that this function does not return differential expression values
           themselves
-        
+
           The fields of the DataFrame are:
             - result_ID: Result set ID of the differential expression analysis. May represent multiple factors in a single model.
             - contrast_ID: Id of the specific contrast factor. Together with the result.ID they uniquely represent a given contrast.
@@ -142,14 +200,14 @@ def get_result_sets(self,
             - experimental_factors: Characteristics of the experimental group. This field is a DataFrame
             - is_subset: True if the result set belong to a subset, False if not. Subsets are created when performing differential expression to avoid unhelpful comparisons.
             - subset_factor: Characteristics of the subset. This field is a DataFrame
-        
+
         :rtype: DataFrame
 
         """
-        
+
         filter = vs.add_to_filter(filter, 'id', result_sets)
         filter = vs.compress_arg(filter)
-        
+
         kwargs = vs.remove_nones(
             datasets = datasets,
             filter = filter,
@@ -157,7 +215,7 @@ def get_result_sets(self,
             limit = limit,
             sort = sort,
             **kwargs)
-        
+
         response = self.raw.get_result_sets(**kwargs)
         df = ps.process_DifferentialExpressionAnalysisResultSetValueObject(response.data,self)
         ps.attach_attributes(df, response.to_dict())
@@ -165,16 +223,16 @@ def get_result_sets(self,
         return df
 
     # /annotations/children ------
-    
+
     def get_annotation_children(self, uri:str,**kwargs)->DataFrame:
         """
         Acquires child terms of a given URI based on ontologies loaded into Gemma. Propagated relations are subClassOf and has_part
-        
-        :param annotation: Term URI
+
+        :param uri: Term URI
         :type annotation: str
-        :param **kwargs: DESCRIPTION
-        :param **kwargs: Additional arguments to pass to raw.search_annotations
-        :return: A DataFrame with annotations for the child terms. 
+        :param kwargs: DESCRIPTION
+        :param kwargs: Additional arguments to pass to raw.search_annotations
+        :return: A DataFrame with annotations for the child terms.
           The fields of the DataFrame are:
             - category_name: Category that the annotation belongs to
             - category_URI: URI for the category_name
@@ -183,7 +241,7 @@ def get_annotation_children(self, uri:str,**kwargs)->DataFrame:
         :rtype: DataFrame
 
         """
-        
+
         response = self.raw.get_annotations_children(uri=uri,**kwargs)
         df = ps.process_search_annotations(response.data)
         return df
@@ -191,12 +249,12 @@ def get_annotation_children(self, uri:str,**kwargs)->DataFrame:
     def get_annotation_parents(self,uri:str,**kwargs)->DataFrame:
         """
         Acquires parent terms of a given URI based on ontologies loaded into Gemma. Propagated relations are subClassOf and has_part
-        
+
         :param annotation: Term URI
         :type annotation: str
         :param **kwargs: DESCRIPTION
         :param **kwargs: Additional arguments to pass to raw.search_annotations
-        :return: A DataFrame with annotations for the parent terms. 
+        :return: A DataFrame with annotations for the parent terms.
           The fields of the DataFrame are:
             - category_name: Category that the annotation belongs to
             - category_URI: URI for the category_name
@@ -205,11 +263,11 @@ def get_annotation_parents(self,uri:str,**kwargs)->DataFrame:
         :rtype: DataFrame
 
         """
-        
+
         response = self.raw.get_annotations_parents(uri=uri,**kwargs)
         df = ps.process_search_annotations(response.data)
         return df
-    
+
     # /annotations/search, search_annotations --------
     def search_annotations(self, query:List[str], **kwargs)->DataFrame:
         """
@@ -219,7 +277,7 @@ def search_annotations(self, query:List[str], **kwargs)->DataFrame:
         :param query: The search query
         :type query: List[str]
         :param **kwargs: Additional arguments to pass to raw.search_annotations
-        :return: A DataFrame with annotations matching the given identifiers. 
+        :return: A DataFrame with annotations matching the given identifiers.
           The fields of the DataFrame are:
             - category_name: Category that the annotation belongs to
             - category_URI: URI for the category_name
@@ -228,10 +286,10 @@ def search_annotations(self, query:List[str], **kwargs)->DataFrame:
         :rtype: DataFrame
 
         """
-        
+
         response = self.raw.search_annotations(query=query, **kwargs)
         return ps.process_search_annotations(response.data)
-    
+
 
     # /datasets/{dataset}/annotations, get_dataset_annotations ----------
     def get_dataset_annotations(self, dataset:str|int, **kwargs)->DataFrame:
@@ -242,13 +300,13 @@ def get_dataset_annotations(self, dataset:str|int, **kwargs)->DataFrame:
         :param dataset: 	A numerical dataset identifier or a dataset short name
         :type dataset: str|int
         :param **kwargs: Additional arguments to pass to raw.get_dataset_annotations
-        :return: A DataFrame with information about the annotations of the queried dataset. 
+        :return: A DataFrame with information about the annotations of the queried dataset.
           The fields of the DataFrame are:
             - class_name: Name of the annotation class (e.g. organism part)
             - class_URI: URI for the annotation class
             - term_name: Name of the annotation term (e.g. lung)
             - term_URI: URI for the annotation term
-            - object_class: Class of object that the term originated from.       
+            - object_class: Class of object that the term originated from.
         :rtype: DataFrame
 
         """
@@ -256,31 +314,31 @@ def get_dataset_annotations(self, dataset:str|int, **kwargs)->DataFrame:
         response = self.raw.get_dataset_annotations(dataset, **kwargs)
         df = ps.process_annotations(response.data)
         ps.attach_attributes(df, response.to_dict())
-        
+
         return df
-    
+
     # /datasets/{dataset}/design, get_dataset_design -----
     # removed as not useful
-    
+
     # /datasets/{datasets}/expressions/differential ------
     # unimplemented
     # not sure how the parameters for this endpoint works and doesn't seem essential
-    
+
     # /datasets/{dataset}/analyses/differential, get_dataset_differential_expression_analyses ------
-    def get_dataset_differential_expression_analyses(self, 
+    def get_dataset_differential_expression_analyses(self,
                                                      dataset:str|int,
                                                      **kwargs)->DataFrame:
         """Retrieve annotations and surface level stats for a dataset's differential analyses
-        
+
         :param dataset: A numerical dataset identifier or a dataset short name
         :type dataset: str|int
         :param **kwargs: Additional arguments to pass to raw.get_dataset_differential_expression_analyses
         :return: A data table with information about the differential expression
-          analysis of the queried dataset. Note that this funciton does not return 
+          analysis of the queried dataset. Note that this funciton does not return
           differential expression values themselves. Use
           get_differential_expression_values to get differential expression
           values (see examples).
-          
+
           The fields of the DataFrame are:
             - result_ID: Result set ID of the differential expression analysis. May represent multiple factors in a single model.
             - contrast_ID: Id of the specific contrast factor. Together with the result.ID they uniquely represent a given contrast.
@@ -290,8 +348,8 @@ def get_dataset_differential_expression_analyses(self,
             - factor_ID: ID of the factor
             - baseline_factors: Characteristics of the baseline. This field is a DataFrame
             - experimental_factors: Characteristics of the experimental group. This field is a DataFrame
-            - isSubset: True if the result set belong to a subset, False if not. 
-              Subsets are created when performing differential expression to avoid 
+            - isSubset: True if the result set belong to a subset, False if not.
+              Subsets are created when performing differential expression to avoid
               unhelpful comparisons.
             - subset_factor: Characteristics of the subset. This field is a DataFrame
             - probes_analyzed: Number of probesets represented in the contrast
@@ -302,15 +360,14 @@ def get_dataset_differential_expression_analyses(self,
         response = self.raw.get_dataset_differential_expression_analyses(dataset,
                                                                          **kwargs)
         df = ps.process_dea(response.data)
-        
+
         return df
-    
+
     # /datasets/{dataset}/analyses/differential/resultSets -----
     # unimplemented
-    # unsure about the distinction between this and the get_dataset_differential_expression_analyses. 
+    # unsure about the distinction between this and the get_dataset_differential_expression_analyses.
     # seem to contain the reduntant information
-    
-    
+
     # /datasets/{dataset}/data -----
     # deprecated, remove later
     def get_dataset_expression(self, dataset:str|int, **kwargs)->DataFrame:
@@ -318,35 +375,32 @@ def get_dataset_expression(self, dataset:str|int, **kwargs)->DataFrame:
         Deprecated in favour of get_dataset_expression
         """
         warnings.warn('get_dataset_expression is deprecated, please use get_dataset_processed_expression instead')
-        
+
         return self.get_dataset_processed_expression(dataset,**kwargs)
 
-    
-    
-    
     # /datasets/{datasets}/expressions/genes/{genes}, get_dataset_expression_for_genes ------
     def get_dataset_expression_for_genes(self,
                                          datasets:List[str|int],
                                          genes:List[int],
                                          keep_non_specific:bool = False,
                                          consolidate = None,
-                                         **kwargs)->dict[int:DataFrame]:
+                                         **kwargs) -> dict[int, DataFrame]:
         """Retrieve the expression data matrix of a set of datasets and genes
 
 
         :param datasets: A numerical dataset identifier or a dataset short name
         :type datasets: List[str|int]
-        :param genes: An ensembl gene identifier which typically starts with 
-          ensg or an ncbi gene identifier or an official gene symbol approved by 
+        :param genes: An ensembl gene identifier which typically starts with
+          ensg or an ncbi gene identifier or an official gene symbol approved by
           hgnc
         :type genes: List[int]
         :param keep_non_specific: If True, results from probesets that are not
           specific to the gene will also be returned., defaults to False
         :type keep_non_specific: bool, optional
-        :param consolidate: An option for gene expression level consolidation. 
+        :param consolidate: An option for gene expression level consolidation.
           If empty, will return every probe for the genes. "pickmax" to pick the
-          probe with the highest expression, "pickvar" to pick the prove with the 
-          highest variance and "average" for returning the average expression, 
+          probe with the highest expression, "pickvar" to pick the prove with the
+          highest variance and "average" for returning the average expression,
           defaults to None
         :type consolidate: TYPE, optional
         :param **kwargs: Additional arguments to pass to raw.get_dataset_expression_for_genes
@@ -354,32 +408,32 @@ def get_dataset_expression_for_genes(self,
         :rtype: dict[int:DataFrame]
 
         """
-        
+
         kwargs = vs.remove_nones(
             keep_non_specific = keep_non_specific,
             consolidate = consolidate,
             **kwargs)
-        
-        response = self.raw.get_datasets_expression_levels_for_genes(datasets, genes, 
+
+        response = self.raw.get_datasets_expression_levels_for_genes(datasets,
+                                                                     genes,
                                                              **kwargs)
         df = ps.process_dataset_gene_expression(response.data,self)
-        
+
         return df
-        
+
     # datasets/{datasets}/expressions/pca -----
     # unimplemented
-    
-    
+
     # datasets/{dataset}/platforms ------
     def get_dataset_platforms(self, dataset:str|int, **kwargs)->DataFrame:
         """
-        
+
         :param dataset: A numerical dataset identifier or a dataset short name
         :type dataset: str|int
         :param **kwargs: Additional arguments to pass to raw.get_dataset_platforms
         :return: A DataFrame with information about the platforms.
           The fields of the DataFrame are:
-                
+
             - platform_ID: Id number of the platform given by Gemma
             - platform_type: Type of the platform.
             - platform_description: Free text field describing the platform.
@@ -389,22 +443,21 @@ def get_dataset_platforms(self, dataset:str|int, **kwargs)->DataFrame:
             - taxon_ID: Internal identifier given to the species by Gemma
             - taxon_NCBI: NCBI ID of the taxon
             - taxon_database_name: Underlying database used in Gemma for the taxon
-            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon  
+            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon
         :rtype: DataFrame
 
         """
         response = self.raw.get_dataset_platforms(dataset, **kwargs)
         df = ps.process_platforms(response.data)
-        
+
         return(df)
-    
-    
+
     # datasets/{dataset}/data/processed ------
-    
+
     def get_dataset_processed_expression(self,dataset:str|int,**kwargs)->DataFrame:
         """Retrieve processed expression data of a dataset
 
-        
+
         :param dataset:  numerical dataset identifier or a dataset short name
         :type dataset: str|int
         :param **kwargs: Additional arguments to pass to raw.get_dataset_processed_expression
@@ -413,55 +466,55 @@ def get_dataset_processed_expression(self,dataset:str|int,**kwargs)->DataFrame:
 
         """
         response = self.raw.get_dataset_processed_expression(dataset, **kwargs)
-        
+
         df = ps.process_expression(response,dataset,self)
-        
+
         return df
-    
+
     # datasets/{dataset}/quantitationTypes get_dataset_quantitation_types ----------
-    
+
     def get_dataset_quantitation_types(self,dataset:int|str,**kwargs)->DataFrame:
         """Retrieve quantitation types of a dataset
 
-        
+
         :param dataset: 	A numerical dataset identifier or a dataset short name
         :type dataset: int|str
         :param **kwargs: Additional arguments to pass to raw.get_dataset_quantitation_types
         :return: A DataFrame containing the quantitation types
-        
+
           The fields of the output DataFrame are:
             - id: If of the quantitation type. Any raw quantitation type can be
                by get_dataset_raw_expression function using this id.
             - name: Name of the quantitation type
             - description: Description of the quantitation type
-            - type: Type of the quantitation type. Either raw or processed. 
-              Each dataset will have one processed quantitation type which is the 
+            - type: Type of the quantitation type. Either raw or processed.
+              Each dataset will have one processed quantitation type which is the
               data returned using get_dataset_processed_expression
             - ratio: Whether or not the quanitation type is a ratio of multiple
               quantitation types. Typically TRUE for processed TWOCOLOR quantitation type.
-            - preferred: The preferred raw quantitation type. This version is 
+            - preferred: The preferred raw quantitation type. This version is
               used in generation of the processed data within gemma.
             - recomputed: If TRUE this quantitation type is generated by
               recomputing raw data files Gemma had access to
         :rtype: DataFrame
 
         """
-        
+
         response = self.raw.get_dataset_quantitation_types(dataset, **kwargs)
         df = ps.process_QuantitationTypeValueObject(response.data)
-        
-        
+
         return df
 
     # datasets/{dataset}/data/raw, get_dataset_raw_expression ---------
     def get_dataset_raw_expression(self,dataset:int|str,
-                                   quantitation_type:[int],**kwargs)->DataFrame:
+                                   quantitation_type: int,
+                                   **kwargs) -> DataFrame:
         """
-        
+
         :param dataset: A numerical dataset identifier or a dataset short name
         :type dataset: int|str
-        :param quantitation_type: Quantitation type id. These can be acquired 
-          using get_dataset_quantitation_types function. This endpoint can only 
+        :param quantitation_type: Quantitation type id. These can be acquired
+          using get_dataset_quantitation_types function. This endpoint can only
           return non-processed quantitation types.
         :type quantitation_type: [int]
         :param **kwargs: Additional arguments to pass to raw.get_dataset_raw_expression
@@ -469,20 +522,19 @@ def get_dataset_raw_expression(self,dataset:int|str,
         :rtype: DataFrame
 
         """
-        
+
         kwargs = vs.remove_nones(
             quantitation_type = quantitation_type,
             **kwargs)
-        
+
         response = self.raw.get_dataset_raw_expression(dataset, **kwargs)
-        
+
         df = ps.process_expression(response,dataset,self)
-        
+
         return df
-    
-    
+
     # datasets/{dataset}/samples, get_dataset_samples --------
-    def get_dataset_samples(self, 
+    def get_dataset_samples(self,
                             dataset:int|str,
                             use_processed_quantitation_type:Optional[bool] = True,
                             **kwargs)->DataFrame:
@@ -496,7 +548,7 @@ def get_dataset_samples(self,
         :type use_processed_quantitationType: Optional[bool]
         :param **kwargs: Additional arguments to pass to raw.get_dataset_samples
         :return: A DataFrame with information about the samples of the queried dataset.
-        
+
           The fields of the DataFrame are:
             - sample_name: Internal name given to the sample.
             - sample_ID: Internal ID of the sample
@@ -512,51 +564,51 @@ def get_dataset_samples(self,
         response = self.raw.get_dataset_samples(dataset, use_processed_quantitation_type = use_processed_quantitation_type, **kwargs)
         df = ps.process_samples(response.data)
         return df
-        
-    # datasets/{dataset}/svd --- 
+
+    # datasets/{dataset}/svd ---
     # not implemented
-    
+
     # datasets, get_datasets ------
-    def get_datasets(self,query:Optional[str] = None, 
-                     filter:Optional[str] = None, 
-                     taxa:Optional[List[str]] = None, 
+    def get_datasets(self, query: Optional[str] = None,
+                     filter: Optional[str] = None,
+                     taxa: Optional[List[str]] = None,
                      uris:Optional[List[str]] = None,
                      offset:int = 0,
                      limit:int = 20,
                      sort:str = "+id",
                      **kwargs)->DataFrame:
         """
-        
-        :param query: The search query. Either plain text ('traumatic'), or an 
+
+        :param query: The search query. Either plain text ('traumatic'), or an
           ontology term URI ('http://purl.obolibrary.org/obo/UBERON_0002048').
-          Datasets that contain the given string in their short of full name will 
+          Datasets that contain the given string in their short of full name will
           also be matched., defaults to None
         :type query: Optional[str], optional
-        :param filter: Filter results by matching expression. Use 
-          filter_properties function to get a list of all available parameters. 
-          These properties can be combined using "and" "or" clauses and may 
-          contain common operators such as "=", "<" or "in". (e.g. 
-          "taxon.commonName = human", "taxon.commonName in (human,mouse), 
+        :param filter: Filter results by matching expression. Use
+          filter_properties function to get a list of all available parameters.
+          These properties can be combined using "and" "or" clauses and may
+          contain common operators such as "=", "<" or "in". (e.g.
+          "taxon.commonName = human", "taxon.commonName in (human,mouse),
           "id < 1000"), defaults to None
         :type filter: Optional[str], optional
         :param taxa: A vector of taxon common names (e.g. human, mouse, rat).
-          Providing multiple species will return results for all species. These 
-          are appended to the filter and equivalent to filtering for 
+          Providing multiple species will return results for all species. These
+          are appended to the filter and equivalent to filtering for
           taxon.commonName property, defaults to None
-        :param taxa: A list of taxon common names (e.g. human, mouse, rat). 
-          Providing multiple species will return results for all species. 
-          These are appended to the filter and equivalent to filtering for 
+        :param taxa: A list of taxon common names (e.g. human, mouse, rat).
+          Providing multiple species will return results for all species.
+          These are appended to the filter and equivalent to filtering for
           taxon.commonName property, defaults to None
         :type taxa: Optional[List[str]], optional
         :param uris: A vector of ontology term URIs. Providing multiple terms
-          will return results containing any of the terms and their children. 
-          These are appended to the filter and equivalent to filtering for 
+          will return results containing any of the terms and their children.
+          These are appended to the filter and equivalent to filtering for
           allCharacteristics.valueUri, defaults to None
         :type uris: Optional[List[str]], optional
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -567,7 +619,7 @@ def get_datasets(self,query:Optional[str] = None,
         :type sort: str, optional
         :param **kwargs: Additional arguments to pass to raw.get_datasets
         :return: A DataFrame with information about the queried dataset(s).
-        
+
           The fields of the DataFrame are:
             - experiment_short_name: Shortname given to the dataset within Gemma. Often corresponds to accession ID
             - experiment_name: Full title of the dataset
@@ -594,7 +646,7 @@ def get_datasets(self,query:Optional[str] = None,
         :rtype: DataFrame
 
         """
-        
+
         filter = vs.add_to_filter(filter, 'allCharacteristics.valueUri', uris)
         filter = vs.add_to_filter(filter, 'taxon.commonName', taxa)
         filter = vs.compress_arg(filter)
@@ -605,52 +657,51 @@ def get_datasets(self,query:Optional[str] = None,
             limit = limit,
             sort = sort,
             **kwargs)
-        
+
         response = self.raw.get_datasets(**kwargs)
         df = ps.process_datasets(response.data)
         ps.attach_attributes(df, response.to_dict())
 
-        
+
         return df
-    
+
     # datasets/annotations -----
     # currently unimplemented
-    
-    
+
     # datasets/{datasets}, get_datasets_by_ids -----
     def get_datasets_by_ids(self, dataset:List[str|int],
-                            filter:Optional[str] = None, 
-                            taxa:Optional[List[str]] = None, 
+                            filter: Optional[str] = None,
+                            taxa: Optional[List[str]] = None,
                             uris:Optional[List[str]] = None,
                             offset:int = 0,
                             limit:int = 20,
                             sort:str = "+id",
                             **kwargs)->DataFrame:
         """
-        
+
         :param dataset: Numerical dataset identifiers or dataset short names.
         :type dataset: List[str|int]
-        :param filter: Filter results by matching expression. Use 
-          filter_properties function to get a list of all available parameters. 
-          These properties can be combined using "and" "or" clauses and may 
-          contain common operators such as "=", "<" or "in". (e.g. 
-          "taxon.commonName = human", "taxon.commonName in (human,mouse), 
+        :param filter: Filter results by matching expression. Use
+          filter_properties function to get a list of all available parameters.
+          These properties can be combined using "and" "or" clauses and may
+          contain common operators such as "=", "<" or "in". (e.g.
+          "taxon.commonName = human", "taxon.commonName in (human,mouse),
           "id < 1000"), defaults to None
         :type filter: Optional[str], optional
-        :param taxa: A list of taxon common names (e.g. human, mouse, rat). 
-          Providing multiple species will return results for all species. 
-          These are appended to the filter and equivalent to filtering for 
+        :param taxa: A list of taxon common names (e.g. human, mouse, rat).
+          Providing multiple species will return results for all species.
+          These are appended to the filter and equivalent to filtering for
           taxon.commonName property, defaults to None
         :type taxa: Optional[List[str]], optional
         :param uris: A vector of ontology term URIs. Providing multiple terms
-          will return results containing any of the terms and their children. 
-          These are appended to the filter and equivalent to filtering for 
+          will return results containing any of the terms and their children.
+          These are appended to the filter and equivalent to filtering for
           allCharacteristics.valueUri, defaults to None
         :type uris: Optional[List[str]], optional
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -661,7 +712,7 @@ def get_datasets_by_ids(self, dataset:List[str|int],
         :type sort: str, optional
         :param **kwargs: Additional arguments to pass to raw.get_datasets_by_ids
         :return: A DataFrame with information about the queried dataset(s).
-        
+
           The fields of the DataFrame are:
             - experiment_short_name: Shortname given to the dataset within Gemma. Often corresponds to accession ID
             - experiment_name: Full title of the dataset
@@ -673,10 +724,10 @@ def get_datasets_by_ids(self, dataset:List[str|int],
             - experiment_URI: URI of the original database
             - experiment_sample_count: Number of samples in the dataset
             - experiment_batch_effect_text: A text field describing whether the dataset has batch effects
-            - experimen_batch_corrected: Whether batch correction has been performed on the dataset.
-            - experimen_batch_confound: 0 if batch info isn't available, -1 if batch counfoud is detected, 1 if batch information is available and no batch confound found
-            - experimen_batch_effect: -1 if batch p value < 0.0001, 1 if batch p value > 0.1, 0 if otherwise and when there is no batch information is available or when the data is confounded with batches.
-            - experimen_raw_data: -1 if no raw data available, 1 if raw data was available. When available, Gemma reprocesses raw data to get expression values and batches
+            - experiment_batch_corrected: Whether batch correction has been performed on the dataset.
+            - experiment_batch_confound: 0 if batch info isn't available, -1 if batch counfoud is detected, 1 if batch information is available and no batch confound found
+            - experiment_batch_effect: -1 if batch p value < 0.0001, 1 if batch p value > 0.1, 0 if otherwise and when there is no batch information is available or when the data is confounded with batches.
+            - experiment_raw_data: -1 if no raw data available, 1 if raw data was available. When available, Gemma reprocesses raw data to get expression values and batches
             - geeq_q_score: Data quality score given to the dataset by Gemma.
             - geeq_s_score: Suitability score given to the dataset by Gemma. Refers to factors like batches, platforms and other aspects of experimental design
             - taxon_name: Name of the species
@@ -691,20 +742,20 @@ def get_datasets_by_ids(self, dataset:List[str|int],
 
         filter = vs.add_to_filter(filter, 'allCharacteristics.valueUri', uris)
         filter = vs.add_to_filter(filter, 'taxon.commonName', taxa)
-        
+
         filter = vs.compress_arg(filter)
-        
+
         kwargs = vs.remove_nones(
             filter = filter,
             offset = offset,
             limit = limit,
             sort = sort,
             **kwargs)
-        
+
         response = self.raw.get_datasets_by_ids(dataset, **kwargs)
         df = ps.process_datasets(response.data)
         ps.attach_attributes(df, response.to_dict())
-        
+
         return df
 
     # datasets/categories -----
@@ -716,12 +767,12 @@ def get_datasets_by_ids(self, dataset:List[str|int],
     # datasets/count -----
     # currently unimplemented
 
-    # genes/{gene}/goTerms -------   
-    
+    # genes/{gene}/goTerms -------
+
     def get_gene_go_terms(self, gene:str|int, **kwargs)->DataFrame:
         """
-        
-        :param gene: An ensembl gene identifier which typically starts with 
+
+        :param gene: An ensembl gene identifier which typically starts with
           ensg or an ncbi gene identifier or an official gene symbol approved by
           hgnc
         :type gene: str|int
@@ -730,7 +781,7 @@ def get_gene_go_terms(self, gene:str|int, **kwargs)->DataFrame:
           The fields of the output DataFrame are:
             - term_name: Name of the term
             - term_ID: ID of the term
-            - term_URI: URI of the term 
+            - term_URI: URI of the term
         :rtype: DataFrame
 
         """
@@ -738,12 +789,11 @@ def get_gene_go_terms(self, gene:str|int, **kwargs)->DataFrame:
         df = ps.process_GO(response.data)
         return df
 
-    
     # genes/{gene}/locations, get_gene_locations ----
-    
+
     def get_gene_locations(self, gene:str|int, **kwargs)->DataFrame:
         """
-        
+
         :param gene: DESCRIPTION
         :type gene: str|int
         :param **kwargs: DAdditional arguments to pass to raw.get_gene_locations
@@ -759,7 +809,7 @@ def get_gene_locations(self, gene:str|int, **kwargs)->DataFrame:
             - taxon_ID: Internal identifier given to the species by Gemma
             - taxon_NCBI: NCBI ID of the taxon
             - taxon_database_name: Underlying database used in Gemma for the taxon
-            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon      
+            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon
         :rtype: DataFrame
 
         """
@@ -767,31 +817,31 @@ def get_gene_locations(self, gene:str|int, **kwargs)->DataFrame:
         response = self.raw.get_gene_locations(gene, **kwargs)
         df = ps.process_gene_location(response.data)
         return df
-    
+
     # genes/{gene}/probes, get_gene_probes -----
-    
+
     def get_gene_probes(self, gene:str|int,
                         offset:int = 0,
                         limit:int = 20,
                         **kwargs)->DataFrame:
         """Retrieve the probes associated to a genes across all platforms
-        
+
         :param gene: An ensembl gene identifier which typically starts with
-          ensg or an ncbi gene identifier or an official gene symbol approved by 
+          ensg or an ncbi gene identifier or an official gene symbol approved by
           hgnc
         :type gene: str|int
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
         :type limit: int, optional
         :param **kwargs: Additional arguments to pass to raw.get_gene_probes
-        :return: A DataFrame with information about the probes representing a 
-          gene across all platrofms.
-        
+        :return: A DataFrame with information about the probes representing a
+          gene across all platforms.
+
           The fields of the output DataFrame are:
             - element_name: Name of the element. Typically the probeset name
             - element_description: A free text field providing optional information about the element
@@ -806,32 +856,32 @@ def get_gene_probes(self, gene:str|int,
             - taxon_ID: Internal identifier given to the species by Gemma
             - taxon_NCBI: NCBI ID of the taxon
             - taxon_database_name: Underlying database used in Gemma for the taxon
-            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon    
+            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon
         :rtype: DataFrame
 
         """
-        
+
         kwargs = vs.remove_nones(offset = offset,
                                  limit = limit,
                                  **kwargs)
-        
+
         response = self.raw.get_gene_probes(gene, **kwargs)
         df = ps.process_elements(response.data)
         ps.attach_attributes(df, response.to_dict())
         return df
-        
-        
+
     # genes/{genes}, get_genes-------
 
-    def get_genes(self, genes:int|str, **kwargs)->DataFrame:
+    def get_genes(self, genes: int | str | List[int | str],
+                  **kwargs) -> DataFrame:
         """Retrieve genes matching gene identifiers
 
-        :param genes: An ensembl gene identifier which typically starts with 
+        :param genes: An ensembl gene identifier which typically starts with
           ensg or an ncbi gene identifier or an official gene symbol approved by hgnc
         :type genes: int|str
         :param **kwargs: Additional arguments to pass to raw_get_genes
         :return: A DataFrame with the information about the querried genes.
-        
+
           The fields of the output DataFrame are:
             - gene_symbol: Symbol for the gene
             - gene_ensembl: Ensembl ID for the gene
@@ -844,18 +894,19 @@ def get_genes(self, genes:int|str, **kwargs)->DataFrame:
             - taxon_ID: Internal identifier given to the species by Gemma
             - taxon_NCBI: NCBI ID of the taxon
             - taxon_database_name: Underlying database used in Gemma for the taxon
-            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon    
+            - taxon_database_ID: ID of the underlying database used in Gemma for the taxon
         :rtype: DataFrame
 
         """
 
-        response = self.raw.get_genes(genes, **kwargs)
+        response = self.raw.get_genes_by_ids(
+            genes if isinstance(genes, list) else [genes], **kwargs)
         df = ps.process_genes(response.data)
         return df
 
     # platforms/count -----
     # unimplemented
-    
+
     # platform/{platform}/annotations -----
     # in gemma.R this endpoint isn't implemented and uses a convenience function instead
     # here we just use the enpoint since the added functionality isn't needed
@@ -867,13 +918,13 @@ def get_genes(self, genes:int|str, **kwargs)->DataFrame:
     # (as of 2022-05-19)
     def get_platform_annotations(self, platform:int|str, **kwargs)->DataFrame:
         """Gets Gemma's platform annotations including mappings of microarray probes to genes.
-        
+
         :param platform: A platform numerical identifier or a platform short name
         :type platform: int|str
         :param **kwargs: Additional arguments to pass to raw.get_platform_annotations
         :type **kwargs: TYPE
         :return: A DataFrame of annotations
-        
+
           - ProbeName: Probeset names provided by the platform. Gene symbols for generic annotations
           - GeneSymbols: Genes that were found to be aligned to the probe sequence. Note that it is possible for probes to be non-specific. Alignment to multiple genes are indicated with gene symbols separated by "|"s
           - GeneNames: Name of the gene
@@ -882,7 +933,7 @@ def get_platform_annotations(self, platform:int|str, **kwargs)->DataFrame:
         :rtype: DataFrame
 
         """
-        
+
         api_response = self.raw.get_platform_annotations(platform, **kwargs)
         uncomment = api_response.split("\n#")
         api_response = uncomment[len(uncomment)-1]
@@ -898,20 +949,20 @@ def get_platform_datasets(self, platform:str|int,
                               **kwargs)->DataFrame:
         """Retrieve all experiments using a given platform
 
-        
+
         :param platform: A platform numerical identifier or a platform short name
         :type platform: str|int
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
         :type limit: int, optional
         :param **kwargs: Additional arguments to pass to raw.get_platform_datasets
         :return: A DataFrame with information about the queried dataset(s).
-        
+
           The fields of the DataFrame are:
             - experiment_short_name: Shortname given to the dataset within Gemma. Often corresponds to accession ID
             - experiment_name: Full title of the dataset
@@ -923,10 +974,10 @@ def get_platform_datasets(self, platform:str|int,
             - experiment_URI: URI of the original database
             - experiment_sample_count: Number of samples in the dataset
             - experiment_batch_effect_text: A text field describing whether the dataset has batch effects
-            - experimen_batch_corrected: Whether batch correction has been performed on the dataset.
-            - experimen_batch_confound: 0 if batch info isn't available, -1 if batch counfoud is detected, 1 if batch information is available and no batch confound found
-            - experimen_batch_effect: -1 if batch p value < 0.0001, 1 if batch p value > 0.1, 0 if otherwise and when there is no batch information is available or when the data is confounded with batches.
-            - experimen_raw_data: -1 if no raw data available, 1 if raw data was available. When available, Gemma reprocesses raw data to get expression values and batches
+            - experiment_batch_corrected: Whether batch correction has been performed on the dataset.
+            - experiment_batch_confound: 0 if batch info isn't available, -1 if batch counfoud is detected, 1 if batch information is available and no batch confound found
+            - experiment_batch_effect: -1 if batch p value < 0.0001, 1 if batch p value > 0.1, 0 if otherwise and when there is no batch information is available or when the data is confounded with batches.
+            - experiment_raw_data: -1 if no raw data available, 1 if raw data was available. When available, Gemma reprocesses raw data to get expression values and batches
             - geeq_q_score: Suitability score given to the dataset by Gemma. Refers to factors like batches, platforms and other aspects of experimental design
             - geeq_s_score: Data quality score given to the dataset by Gemma.
             - taxon_name: Name of the species
@@ -938,12 +989,11 @@ def get_platform_datasets(self, platform:str|int,
         :rtype: DataFrame
 
         """
-        
+
         kwargs = vs.remove_nones(offset = offset,
                                  limit = limit,
                                  **kwargs)
-        
-        
+
         response = self.raw.get_platform_datasets(platform, **kwargs)
         df = ps.process_datasets(response.data)
         ps.attach_attributes(df, response.to_dict())
@@ -951,18 +1001,17 @@ def get_platform_datasets(self, platform:str|int,
 
     # platforms/{platform}/elements/{probes} -----
     # not implemented
-    
+
     # platforms/{platform}/elements/{probe}/genes, get_platform_element_genes ----
 
-    def get_platform_element_genes(self, platform:str|int, 
+    def get_platform_element_genes(self, platform: str | int,
                                    probe:str|int,
                                    offset:int = 0,
                                    limit:int = 20,
                                    **kwargs)->DataFrame:
-        
-        
+
         """Retrieve the genes associated to a probe in a given platform
-        
+
         :param platform: A platform numerical identifier or a platform short name
         :type platform: str|int
         :param probe: A probe name or it's numerical identifier
@@ -970,7 +1019,7 @@ def get_platform_element_genes(self, platform:str|int,
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -994,11 +1043,11 @@ def get_platform_element_genes(self, platform:str|int,
         :rtype: DataFrame
 
         """
-        
+
         kwargs = vs.remove_nones(offset = offset,
                                  limit = limit,
                                  **kwargs)
-        
+
         response =  self.raw.get_platform_element_genes(platform, probe, **kwargs)
         df = ps.process_genes(response.data)
         ps.attach_attributes(df, response.to_dict())
@@ -1018,23 +1067,23 @@ def get_platforms(self,
                       **kwargs)->DataFrame:
         """
         Retrieve all platforms
-        
-        :param filter: Filter results by matching expression. Use 
+
+        :param filter: Filter results by matching expression. Use
           filter_properties function to get a list of all available parameters.
-          These properties can be combined using "and" "or" clauses and may 
-          contain common operators such as "=", "<" or "in". (e.g. 
-          "taxon.commonName = human", "taxon.commonName in (human,mouse), 
+          These properties can be combined using "and" "or" clauses and may
+          contain common operators such as "=", "<" or "in". (e.g.
+          "taxon.commonName = human", "taxon.commonName in (human,mouse),
           "id < 1000"), defaults to None
         :type filter: str, optional
-        :param taxa: A list of taxon common names (e.g. human, mouse, rat). 
-          Providing multiple species will return results for all species. These 
-          are appended to the filter and equivalent to filtering for 
+        :param taxa: A list of taxon common names (e.g. human, mouse, rat).
+          Providing multiple species will return results for all species. These
+          are appended to the filter and equivalent to filtering for
           taxon.commonName property, defaults to None
         :type taxa: List[str], optional
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -1045,7 +1094,7 @@ def get_platforms(self,
         :type sort: str, optional
         :param **kwargs: Additional arguments to raw.get_platforms_by_ids
         :return: A DataFrame with information about the platform(s).
-        
+
           The fields of the output DataFrame are:
 
             - platform_ID: Internal identifier of the platform
@@ -1064,52 +1113,50 @@ def get_platforms(self,
         :rtype: DataFrame
 
         """
-        
+
         filter = vs.add_to_filter(filter,"taxon.commonName",taxa)
-        
+
         kwargs = vs.remove_nones(filter = filter,
                                  offset = offset,
                                  limit = limit,
                                  sort = sort,
                                  **kwargs)
-        
+
         response =  self.raw.get_platforms(**kwargs)
         df = ps.process_platforms(response.data)
         ps.attach_attributes(df, response.to_dict())
         return df
 
-    # platforms/{platform}, get_platforms_by_ids ---- 
-    def get_platforms_by_ids(self, platforms:List[str|int], 
+    # platforms/{platform}, get_platforms_by_ids ----
+    def get_platforms_by_ids(self, platforms: List[str | int],
                              filter:str = None,
                              taxa:List[str] = None,
                              offset:int=0,
                              limit:int = 20,
                              sort:str="+id",
                              **kwargs)->DataFrame:
-   
-        
-        
+
         """Retrieve platforms by their identifiers
 
-        
+
         :param platforms: Platform numerical identifiers or platform short names.
         :type platforms: List[str|int]
-        :param filter: Filter results by matching expression. Use 
+        :param filter: Filter results by matching expression. Use
           filter_properties function to get a list of all available parameters.
-          These properties can be combined using "and" "or" clauses and may 
-          contain common operators such as "=", "<" or "in". (e.g. 
-          "taxon.commonName = human", "taxon.commonName in (human,mouse), 
+          These properties can be combined using "and" "or" clauses and may
+          contain common operators such as "=", "<" or "in". (e.g.
+          "taxon.commonName = human", "taxon.commonName in (human,mouse),
           "id < 1000"), defaults to None
         :type filter: str, optional
-        :param taxa: A list of taxon common names (e.g. human, mouse, rat). 
-          Providing multiple species will return results for all species. These 
-          are appended to the filter and equivalent to filtering for 
+        :param taxa: A list of taxon common names (e.g. human, mouse, rat).
+          Providing multiple species will return results for all species. These
+          are appended to the filter and equivalent to filtering for
           taxon.commonName property, defaults to None
         :type taxa: List[str], optional
         :param offset: The offset of the first retrieved result., defaults to 0
         :type offset: int, optional
         :param limit: Limits the result to specified amount of objects.
-          Has a maximum value of 100. Use together with offset and the 
+          Has a maximum value of 100. Use together with offset and the
           total_elements attribute in the output to compile all data if needed.
           Alternatively get_all_pages function can be used with all functions
           including offset and limit parameters, defaults to 20
@@ -1120,7 +1167,7 @@ def get_platforms_by_ids(self, platforms:List[str|int],
         :type sort: str, optional
         :param **kwargs: Additional arguments to raw.get_platforms_by_ids
         :return: A DataFrame with information about the platform(s).
-        
+
           The fields of the output DataFrame are:
 
             - platform_ID: Internal identifier of the platform
@@ -1146,17 +1193,16 @@ def get_platforms_by_ids(self, platforms:List[str|int],
                                  limit = limit,
                                  sort = sort,
                                  **kwargs)
-        
+
         response =  self.raw.get_platforms_by_ids(platforms, **kwargs)
         df = ps.process_platforms(response.data)
         ps.attach_attributes(df, response.to_dict())
         return df
-    
-    
+
     # search search ------
     # this enpdoint is not very useful when specific endpoints exist for specific
     # result types. keeping here for now for compatibility with R
-    
+
     def search_gemma(self,
                      query:str,
                      taxon:Optional[str|int]=None,
@@ -1165,11 +1211,10 @@ def search_gemma(self,
                      result_type:str = "experiment",
                      **kwargs)->list[sdk.SearchResultValueObjectObject]:
 
-        
         """
         Search everything in Gemma
-        
-        :param query: The search query. Either plain text ('traumatic'), or an 
+
+        :param query: The search query. Either plain text ('traumatic'), or an
           ontology term URI ('http://purl.obolibrary.org/obo/UBERON_0002048').
           Datasets that contain the given string in their short of full name
           will also be matched. Can be multiple identifiers separated by commas.
@@ -1178,7 +1223,7 @@ def search_gemma(self,
           or a taxon identifier that matches either its scientific or common
           name, defaults to None
         :type taxon: Optional[str|int], optional
-        :param platform: A platform numerical identifier or a platform short 
+        :param platform: A platform numerical identifier or a platform short
           name, defaults to None
         :type platform: Optional[str|int], optional
         :param limit: Defaults to 100 with a maximum value of 2000. Limits the
@@ -1190,26 +1235,26 @@ def search_gemma(self,
           documented in the API documentation., defaults to "experiment"
         :type result_type: str, optional
         :param **kwargs: Additional arguments to raw.search
-        :return: A list containing the results. Actual results are under the 
+        :return: A list containing the results. Actual results are under the
           result_object component as dicts
         :rtype: list[sdk.SearchResultValueObjectObject]
 
         """
-        
+
         result_type = vs.check_result_type(result_type)
-        
+
         kwargs = vs.remove_nones(query =query,
                                  taxon = taxon,
                                  platform = platform,
                                  limit = limit,
                                  result_types = [result_type],
                                  **kwargs)
-        
+
         response = self.raw.search(**kwargs)
         # df = ps.process_search(response.data,result_type)
-        
+
         return response.data
-    
+
     # taxa/{taxon}/genes/{gene}/locations----
     # unimplemented, redundant with get_gene_locations
 
@@ -1217,23 +1262,21 @@ def search_gemma(self,
     def get_taxa(self, **kwargs)->DataFrame:
         """
         Get all taxa within Gemma
-        
+
         :param **kwargs: Additional arguments to raw.get_taxa
         :return: A DataFrame including the names, IDs and database information
           about the taxons
         :rtype: DataFrame
 
         """
-        
+
         response =  self.raw.get_taxa(**kwargs)
-        
+
         df = ps.process_taxon(response.data)
         return df[df.isnull().taxon_name != True]
-        
-        
-    
+
     # taxa/{taxa}, get_taxa_by_ids -----
-    # implemented, hardly needed with 3 taxa  
+    # implemented, hardly needed with 3 taxa
 
 
 
@@ -1241,7 +1284,6 @@ def get_taxa(self, **kwargs)->DataFrame:
 
     # set_gemma_user is not needed since it's wrapped in the GemmaPy class
     # get_platform_annotations is the default get_platform_annotations
-    
 
     def make_design(self,samples:DataFrame,meta_type:str = 'text')->DataFrame:
         """
@@ -1258,32 +1300,29 @@ def make_design(self,samples:DataFrame,meta_type:str = 'text')->DataFrame:
         :rtype: DataFrame
 
         """
-        
-        categories = pd.concat([x[["factor_ID","factor_category","factor_category_URI"]] 
+
+        categories = pd.concat(
+            [x[["factor_ID", "factor_category", "factor_category_URI"]]
                                 for x in samples.sample_factor_values],
                   ignore_index = True).drop_duplicates()
-        
-        
+
         def get_val_uri(x):
-            return [",".join([str(z) if z is not None else "" 
-                              for z in y[y.factor_ID==x].value_URI]) 
+            return [",".join([str(z) if z is not None else ""
+                              for z in y[y.factor_ID == x].value_URI])
                     for y in samples.sample_factor_values]
-        
+
         factor_URIs = [get_val_uri(x) for x in categories.factor_ID]
-        
+
         def get_text(x):
             def get_summary(y):
                 return ','.join([z[1].summary
-                                 if z[1].summary is not None else z[1].value 
+                                 if z[1].summary is not None else z[1].value
                                  for z in y[y.factor_ID==x].iterrows()])
-            
+
             return [get_summary(y) for y in samples.sample_factor_values]
-            
-            
-        
+
         text = [get_text(x) for x in categories.factor_ID]
-        
-        
+
         if meta_type =='text':
             design_frame = pd.DataFrame({
                 categories.factor_category[i]:text[i] for i in range(len(text))
@@ -1295,23 +1334,20 @@ def get_summary(y):
         elif meta_type =='both':
             merged_name = [["|".join([categories.factor_category[i],
                                       categories.factor_category_URI[i]])] for i in range(len(text))]
-            
-            merged_col = [["|".join([text[i][j],factor_URIs[i][j]]) 
+
+            merged_col = [["|".join([text[i][j], factor_URIs[i][j]])
                            for j in range(len(text[i]))] for i in range(len(text))]
-            
-            
+
             design_frame = pd.DataFrame({
                 merged_name[i]:merged_col[i]
                 for i in range(len(text))})
-            
+
         design_frame.insert(loc = 0,column = "factor_values",
                             value = samples.sample_factor_values)
         design_frame.index = samples.sample_name
-        
+
         return design_frame
-        
-        
-    
+
     def __subset_factor_values(self,
                                factor_values,
                                differential_expressions:pd.DataFrame,
@@ -1319,51 +1355,50 @@ def __subset_factor_values(self,
                                contrast):
         out = sub.rep(True,len(factor_values))
         if differential_expressions is not None:
-            subset = differential_expressions[differential_expressions.result_ID == 
+            subset = differential_expressions[
+                differential_expressions.result_ID ==
                                      result_set].subset_factor.drop_duplicates()
             # result set should have the same subset for all contrasts
             assert len(subset) == 1
             if subset[0].shape[0]!=0:
                 subset_ids = subset[0].ID
-                
+
                 in_subset = [any(sub.list_in_list(x.ID, subset_ids)) for x in factor_values]
-                
+
                 out = out and in_subset
-            
+
             if contrast is not None:
                 cn = differential_expressions[
-                    list(differential_expressions.result_ID == result_set) and 
+                    list(differential_expressions.result_ID == result_set) and
                     list(differential_expressions.contrast_ID == str(contrast))]
-                
+
                 baseline_id = list(sub.unique(sub.break_list([list(x.ID) for x in cn.baseline_factors])))
                 baseline_factor_id = list(sub.unique(sub.break_list([list(x.factor_ID) for x in cn.baseline_factors])))
-                
+
                 contrast_id =  list(sub.unique(sub.break_list([list(x.ID) for x in cn.experimental_factors])))
                 contrast_factor_id =  list(sub.unique(sub.break_list([list(x.factor_ID) for x in cn.experimental_factors])))
-                
+
                 contrast_id = sub.match_by(contrast_id,baseline_factor_id, contrast_factor_id)
-                
+
                 def in_con(factor_value):
                     cond1 = all(sub.list_in_list(contrast_id, factor_value.ID)) or \
-                            all(sub.list_in_list(baseline_id,factor_value.ID))                   
-                    
+                            all(sub.list_in_list(baseline_id, factor_value.ID))
+
                     if len(contrast_id)==2:
-                        cond2 = (contrast_id[0] in factor_value.ID and \
+                        cond2 = (contrast_id[0] in factor_value.ID and
                                     baseline_id[1] in factor_value.ID) or \
-                            (contrast_id[1] in factor_value.ID and \
+                                (contrast_id[1] in factor_value.ID and
                                        baseline_id[0] in factor_value.ID)
-                        
+
                         cond1 = cond1 or cond2
-                    
+
                     return cond1
-                        
+
                 in_contrast = [in_con(x) for x in factor_values]
-                
+
                 out = out and in_contrast
-        
-        return out
-        
 
+        return out
 
     def get_dataset_object(self, datasets:List[str|int],
                            genes:Optional[List[str|int]] = None,
@@ -1373,46 +1408,46 @@ def get_dataset_object(self, datasets:List[str|int],
                            contrasts:Optional[List[str]] = None,
                            meta_type:str = 'text',
                            output_type:str = 'anndata',
-                           **kwargs)->dict[int:dict|AnnData]:
-        
+                           **kwargs) -> dict[int, dict | AnnData]:
+
         """Return a data structure including all relevant data related to
-        gene expression in a dataset. Either returns an anndata object or 
+        gene expression in a dataset. Either returns an anndata object or
         a dictionary with all the needed fields.
-        
-        
+
+
         :param datasets: Numerical dataset identifier
           dataset short names
         :type datasets: List[str|int]
-        :param genes: An ncbi gene identifier an, 
-          ensembl gene identifier which typically starts with ensg or an 
+        :param genes: An ncbi gene identifier an,
+          ensembl gene identifier which typically starts with ensg or an
           official gene symbol approved by hgnc, defaults to None
         :type genes: Optional[List[str|int]], optional
-        :param keep_non_specific: If True, results from 
+        :param keep_non_specific: If True, results from
           probesets that are not specific to the gene will also be returned,
           defaults to False
         :type keep_non_specific: TYPE, optional
-        :param consolidate: DESCRIPTION, An option for gene expression level consolidation. 
-          If empty, will return every probe for the genes. "pickmax" to pick 
-          the probe with the highest expression, "pickvar" to pick the prove 
-          with the highest variance and "average" for returning the average 
+        :param consolidate: DESCRIPTION, An option for gene expression level consolidation.
+          If empty, will return every probe for the genes. "pickmax" to pick
+          the probe with the highest expression, "pickvar" to pick the prove
+          with the highest variance and "average" for returning the average
           expression to None
         :type consolidate: Optional[str], optional
-        :param result_sets: Result set IDs of the a 
-          differential expression analysis. If provided, the output will only 
-          include the samples from the subset used in the result set ID. Must 
+        :param result_sets: Result set IDs of the a
+          differential expression analysis. If provided, the output will only
+          include the samples from the subset used in the result set ID. Must
           be the same length as datasets, defaults to None
         :type result_sets: Optional[List[int]], optional
-        :param contrasts: Contrast IDs of a differential 
-          expression contrast. Need result_sets to be defined to work. If 
+        :param contrasts: Contrast IDs of a differential
+          expression contrast. Need result_sets to be defined to work. If
           provided, the output will only include samples relevant to the '
           specific contrats. Must be the same length as datasets.
-        :param str meta_type: How should the metadata information should be 
+        :param str meta_type: How should the metadata information should be
           included. Can be "text", "uri" or "both". "text" and "uri" options, defaults to None
         :type contrasts: Optional[List[str]], optional
-        :param meta_type: How should the metadata information should be 
+        :param meta_type: How should the metadata information should be
           included. Can be "text", "uri" or "both". "text" and "uri" options, defaults to 'text'
         :type meta_type: str, optional
-        :param output_type: Type of the returned object. "anndata" for an 
+        :param output_type: Type of the returned object. "anndata" for an
           AnnData object and "dict" for a dictionary populated with DataFrames,
           defaults to 'anndata'
         :type output_type: str, optional
@@ -1424,32 +1459,26 @@ def get_dataset_object(self, datasets:List[str|int],
         :rtype: dict[int:dict|AnnData]
 
         """
-        
-
-        
-        
 
-        
         if output_type not in ["anndata","tidy","dict"]:
             raise ValueError('Please enter a valid output_type. anndata for'
                              '"anndata" objects, "tidy" for long form pandas'
                              'DataFrames, "dict" for dictionaries with separate'
                              'expression and metadata fields'
                               )
-            
+
         unique_sets = list(set(datasets))
-        
+
         metadata = {k:self.get_dataset_samples(k) for k in unique_sets}
-        
-        
+
         if genes is None:
             def get_exp(dataset):
                 exp = self.get_dataset_processed_expression(dataset)
                 meta = metadata[dataset]
-                
+
                 if not keep_non_specific:
                     exp = exp[~exp.GeneSymbol.str.contains("|",regex = False,na = True)]
-                
+
                 if consolidate is not None and consolidate =='pickmax':
                     mean_exp = exp[meta.sample_name].mean(axis=1,skipna=True)
                     exp = exp.iloc[list(sub.order(mean_exp,decreasing = True))]
@@ -1464,12 +1493,12 @@ def get_exp(dataset):
                             exp[exp.duplicated("GeneSymbol")]["GeneSymbol"]
                             ))
                         )
-                    
+
                     def get_mean(dup):
                         dup_subset = exp[exp.GeneSymbol == dup]
                         dup_mean =  exp[meta.sample_name].mean(axis = 0)
                         probe = "Averaged from " + " ".join(dup_subset.Probe)
-                        
+
                         gene_info = dup_subset.\
                             loc[:,
                                 np.array(~np.array(
@@ -1477,21 +1506,22 @@ def get_mean(dup):
                                         dup_subset.columns,["Probe"] +\
                                             list(meta.sample_name))))].iloc[0].\
                                 to_frame().T
-                        
+
                         probe = pd.DataFrame({
                             "Probe": [probe]})
                         data = dup_mean.to_frame().T
                         return pd.concat([probe,gene_info.reset_index(drop = True),data],
                                          axis = 1)
-                    
+
                     dup_means = [get_mean(dup) for dup in dups]
-                    exp = pd.concat([exp[~exp.GeneSymbol.isin(dups)]] + dup_means, 
+                    exp = pd.concat(
+                        [exp[~exp.GeneSymbol.isin(dups)]] + dup_means,
                                     ignore_index = True)
                 return exp
             #get_exp
-            
+
             expression = {k:get_exp(k) for k in unique_sets}
-            
+
         else:
             expression = self.\
                 get_dataset_expression_for_genes(unique_sets,
@@ -1499,7 +1529,7 @@ def get_mean(dup):
                                                  keep_non_specific = keep_non_specific,
                                                  consolidate = consolidate)
             expression = sub.make_dict(unique_sets,  list(expression.values()))
-                    
+
         designs = {k:self.make_design(metadata[k]) for k in metadata.keys()}
         dat = self.get_datasets_by_ids(unique_sets)
         def pack_data(i):
@@ -1514,38 +1544,35 @@ def pack_data(i):
                 }
             # create unique probe ids. needed for rownames and merging
             # probe ids are usually unique but there are exceptions
-            
+
             unique_probes = packed_info['exp'].Probe
             append = pd.Series(sub.rep(0,len(unique_probes)))
             dups = unique_probes.duplicated()
             while dups.any():
                 append[dups] = append[dups]+1
                 dups = (unique_probes + append.astype('string')).duplicated()
-            
+
             append = append.astype('string')
             append[append=='0'] = ""
             unique_probes = unique_probes + append
             packed_info['unique_probes'] = unique_probes
-            
-            
+
             if result_sets is not None:
                 packed_info['result_set'] = result_sets[i]
             if contrasts is not None:
                 packed_info['contrasts'] = contrasts[i]
             # reordering to match expression/metadata no longer necesarry
-            
-            
+
             if result_sets is not None:
                 diff = self.get_dataset_differential_expression_analyses(dataset)
 
-                
-                gene_info = packed_info['exp'].\
-                    columns[[not x 
+                gene_info = packed_info['exp']. \
+                    columns[[not x
                              for x in sub.list_in_list(packed_info['exp'].columns,
                                                        packed_info['design'].index)]]
-                
+
                 cons = None if contrasts is None else contrasts[i]
-                
+
                 relevant = self.__subset_factor_values(packed_info['design'].\
                                                        factor_values,
                                                        diff,
@@ -1553,12 +1580,9 @@ def pack_data(i):
                                                        cons)
                 packed_info['design'] = packed_info['design'][relevant]
                 packed_info['exp'] = packed_info['exp'][gene_info.append(packed_info['design'].index)]
-                
-                    
+
             return packed_info
-        
-        
-        
+
         # packed_data = [pack_data(i) for i in range(len(datasets))]
         packed_data = [pack_data(i) for i in range(len(datasets))]
         keys = [str(x['dat'].experiment_ID[0]) for x in packed_data]
@@ -1566,21 +1590,20 @@ def pack_data(i):
         if result_sets is not None:
             keys = [keys[i] + "_" + str(result_sets[i]) for i in range(len(datasets))]
             if contrasts is not None:
-                keys = [keys[i] + "_" + str(contrasts[i]) for i in range(len(datasets))] 
-        
+                keys = [keys[i] + "_" + str(contrasts[i]) for i in
+                        range(len(datasets))]
+
         packed_data = {keys[i]:packed_data[i] for i in range(len(datasets))}
-        
-        
-        
+
         if output_type == 'anndata':
             def make_anndata(pack):
                 pack['exp'].index = pack['unique_probes']
-                try: 
+                try:
                     gene_data = pack['exp'][['GeneSymbol', 'NCBIid']]
                 except KeyError:
                     warnings.warn("WARNING: One or more gene descriptions are missing in Expression table")
                     gene_data = None
-                
+
                 mda = {
                     'title': pack['dat'].experiment_name[0],
                     'abstract': pack['dat'].experiment_description[0],
@@ -1592,18 +1615,17 @@ def make_anndata(pack):
                     "GemmaSuitabilityScore":  pack['dat'].geeq_s_score[0],
                     "taxon":  pack['dat'].taxon_name[0]
                     }
-                
+
                 exp = pack['exp'][pack['design'].index]
                 adata = ad.AnnData(exp)
                 if not (gene_data is None):
                     adata.obs = adata.obs.join(gene_data)
-                    
-                
+
                 adata.var = adata.var.join(pack['design'])
                 adata.uns = mda
                 return adata
             # make_anndata
-            
+
             out = {k:make_anndata(packed_data[k]) for k in packed_data.keys()}
         elif output_type == 'dict':
             out = packed_data
@@ -1611,42 +1633,81 @@ def make_anndata(pack):
             pass
         return out
 
-    def get_differential_expression_values(self, 
+    def get_single_cell_dataset_object(self, dataset: str | int,
+                                       download_dir=None) -> AnnData:
+        """
+        :param download_dir: Directory where datasets can be downloaded, or else
+        the data will be retrieved in-memory.
+        :return:
+        """
+
+        def resolve():
+            if download_dir:
+                dest = join(download_dir, dataset + '.tar')
+                if not os.path.exists(dest):
+                    logger.info('Downloading single-cell data for %s to %s...',
+                                dataset, download_dir)
+                    with open(dest, 'wb') as f:
+                        f.write(self.raw.get_dataset_single_cell_expression(
+                            dataset))
+                return open(dest, 'rb')
+            else:
+                logger.info("Downloading single-cell data data for %s...",
+                            str(dataset))
+                return BytesIO(
+                    self.raw.get_dataset_single_cell_expression(dataset))
+
+        with (resolve() as f, tarfile.open(fileobj=f) as tf,
+              tempfile.TemporaryDirectory() as tmpdir):
+            logger.info('Extracting TAR file for %s to %s...', str(dataset),
+                        tmpdir)
+            tf.extractall(tmpdir)
+            samples = []
+            for sample_dir in os.listdir(tmpdir):
+                logger.info('Reading MEX data for %s...', sample_dir)
+                # Gemma already guarantees unicity of cell identifiers and
+                # scanpy cannot deal with numeric gene identifiers when
+                # make_unique is True, so we skip that part
+                samples.append(scanpy.read_10x_mtx(join(tmpdir, sample_dir),
+                                                   make_unique=False))
+            return scanpy.concat(samples, axis="var")
+
+    def get_differential_expression_values(self,
                                            dataset:Optional[str|int] = None,
                                            keep_non_specific:bool = False,
                                            result_sets:Optional[List[str|int]] = None,
-                                           readable_contrasts:bool = False, 
+                                           readable_contrasts: bool = False,
                                            **kwargs)->List[DataFrame]:
         """
         Retrieves the differential expression resultSet(s) associated with the dataset.
         If there is more than one resultSet, use get_result_sets() to see the options
         and get the ID you want. Alternatively, you can query the resultSet directly
         if you know its ID beforehand.
-        
-        In Gemma each result set corresponds to the estimated effects 
-        associated with a single factor in the design, and each can have 
-        multiple contrasts (for each level compared to baseline). Thus a 
-        dataset with a 2x3 factorial design will have two result sets, one of 
+
+        In Gemma each result set corresponds to the estimated effects
+        associated with a single factor in the design, and each can have
+        multiple contrasts (for each level compared to baseline). Thus a
+        dataset with a 2x3 factorial design will have two result sets, one of
         which will have one contrast, and one having two contrasts.
-        
-        The methodology for differential expression is explained in `Curation 
+
+        The methodology for differential expression is explained in `Curation
         of over 10000 transcriptomic studies to enable data reuse <https://doi.org/10.1093/database/baab006>`_.
         Briefly, differential expression analysis is performed on the dataset
         based on the annotated experimental design with up two three potentially
         nested factors. Gemma attempts to automatically assign baseline conditions
-        for each factor. In the absence of a clear control condition, a baseline 
+        for each factor. In the absence of a clear control condition, a baseline
         is arbitrarily selected. A generalized linear model with empirical Bayes
-        shrinkage of t-statistics is fit to the data for each platform element 
-        (probe/gene) using an implementation of the limma algorithm. For 
+        shrinkage of t-statistics is fit to the data for each platform element
+        (probe/gene) using an implementation of the limma algorithm. For
         RNA-seq data, we use weighted regression, applying the voom algorithm
-        to compute weights from the mean–variance relationship of the data. 
-        Contrasts of each condition are then computed compared to the selected 
+        to compute weights from the mean–variance relationship of the data.
+        Contrasts of each condition are then computed compared to the selected
         baseline. In some situations, Gemma will split the data into subsets for
-        analysis. A typical such situation is when a ‘batch’ factor is present 
+        analysis. A typical such situation is when a ‘batch’ factor is present
         and confounded with another factor, the subsets being determined by the
         levels of the confounding factor.
-        
-        
+
+
         :param dataset: A dataset identifier, defaults to None
         :type dataset: Optional[str|int], optional
         :param result_sets: result set identifiers. If a dataset
@@ -1658,7 +1719,7 @@ def get_differential_expression_values(self,
           accessed using get_dataset_differential_expression_analyses(). If True IDs
           will be replaced with human readable contrast information, defaults to False
         :type readable_contrasts: bool, optional
-        :param **kwargs: 
+        :param **kwargs:
         :type **kwargs: TYPE
         :raises ValueError: Will return a value error if neither result_sets nor a dataset
           is provided
@@ -1670,7 +1731,8 @@ def get_differential_expression_values(self,
             diffs =  self.get_dataset_differential_expression_analyses(dataset)
             rss = diffs.result_ID
             if not all(sub.list_in_list(result_sets, rss)):
-                warnings.warn('The queried resultSet is not derived from this dataset. ' 
+                warnings.warn(
+                    'The queried resultSet is not derived from this dataset. '
                               'Check the available resultSets with "get_result_sets()" '
                               'or query without the dataset parameter.')
                 return
@@ -1679,17 +1741,17 @@ def get_differential_expression_values(self,
             result_sets = diffs.result_ID.unique()
         elif dataset is None and result_sets is None:
             raise ValueError('You must specify a dataset or result_sets')
-            
+
         rss = {}
         if readable_contrasts:
             all_factors =  self.get_result_sets(result_sets = result_sets)
-        
+
         for rs in result_sets:
             df = self.__get_result_set(rs)
-            
+
             if not keep_non_specific:
                 df = df[~df.GeneSymbol.str.contains("|",regex = False,na = True)]
-            
+
             if readable_contrasts:
                 factors = pd.concat(
                     list(all_factors[all_factors.result_ID == rs].experimental_factors)
@@ -1700,11 +1762,10 @@ def get_differential_expression_values(self,
                 df.columns = cols
             rss[rs] = df
         return rss
-            
 
     # get_taxa is moved to base. only removes the nameless rat now
 
-    # gemma_call unimplemented, not needed    
+    # gemma_call unimplemented, not needed
 
     def get_all_pages(self,fun:Callable,step_size:int = 100,**kwargs)->list|DataFrame:
         """
@@ -1712,60 +1773,66 @@ def get_all_pages(self,fun:Callable,step_size:int = 100,**kwargs)->list|DataFram
         If the function returns a DataFrame output will be merged by the rows,
         if the function returns a list (eg. 'raw' functions) a concatanated list
         will be returned
-        
-        
+
+
         :param fun: A callable from gemmapy with offset and limit
           functions
         :type fun: Callable
-        :param step_size: Size of individual calls to the server. 100 is 
+        :param step_size: Size of individual calls to the server. 100 is
           the maximum value and the default.
         :type step_size: int, optional
         :param **kwargs: arguments for the callable fun
-        :return: A DataFrame or a list containing all the output depending on 
+        :return: A DataFrame or a list containing all the output depending on
           output of the callable
         :rtype: list|DataFrame
         """
 
-        out = []        
+        out = []
         poke_call = fun(limit =1,**kwargs)
-        
-        if type(poke_call) == pd.core.frame.DataFrame:
+
+        if isinstance(poke_call, pd.DataFrame):
             count = poke_call.attributes["total_elements"]
         else:
             count = poke_call.total_elements
-        
+
         for i in range(0,count,step_size):
             out.append(fun(limit = step_size,offset = i,**kwargs))
-        
-        if type(poke_call) == pd.core.frame.DataFrame:
+
+        if isinstance(poke_call, pd.DataFrame):
             return pd.concat(out,ignore_index = True)
         else:
             return sub.break_list([x.data for x in out])
-        
 
-    
-    def filter_properties(self, output_type:str = 'DataFrame')->dict|DataFrame:
+    def filter_properties(self, output_type: str = 'DataFrame') -> (None |
+                                                                    dict[
+                                                                        str,
+                                                                        dict[
+                                                                            Any,
+                                                                            dict[
+                                                                                str, Any]]] |
+                                                                    dict[
+                                                                        str, DataFrame]):
         """
         Some functions such as get_datasets and get_platforms include a filter
-        argument that allows creation of more complex queries. This function 
+        argument that allows creation of more complex queries. This function
         returns a list of supported properties to be used in those filters
-        
+
         :param output_type: Type to return. "DataFrame" or "dict", defaults to 'DataFrame'
         :type output_type: str, optional
         :return: DataFrame or dict containing supported properties and their data types
         :rtype: dict|DataFrame
 
         """
-        
+
         d = self.raw.api_client.rest_client.GET("https://gemma.msl.ubc.ca/rest/v2/openapi.json").urllib3_response
         api_file = json.loads(d.data)
-        
+
         dataset_filter = api_file["components"]["schemas"]["FilterArgExpressionExperiment"]["x-gemma-filterable-properties"]
-        
+
         platform_filter = api_file["components"]["schemas"]["FilterArgArrayDesign"]["x-gemma-filterable-properties"]
-        
+
         result_set_filter = api_file["components"]["schemas"]["FilterArgExpressionAnalysisResultSet"]["x-gemma-filterable-properties"]
-        
+
         if output_type == 'DataFrame':
             return {
                 "dataset":pd.DataFrame({
@@ -1792,9 +1859,5 @@ def filter_properties(self, output_type:str = 'DataFrame')->dict|DataFrame:
                                               "description":sub.access_field(x,'description',None)} for x in platform_filter},
                 'result_set':{x['name']:{"type":x['type'],
                                               "description":sub.access_field(x,'description',None)} for x in result_set_filter}
-                
-                }
-            
-        
-    
 
+            }
diff --git a/setup.cfg b/setup.cfg
index b8df889..aa27e20 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,6 +17,7 @@ install_requires =
     pandas
     numpy
     anndata
+    scanpy
     typing
 
 #[options.packages.find]
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 564bd84..84e23a7 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -5,13 +5,16 @@
 
 @author: omancarci
 """
+import os
+import time
+
+import anndata as ad
+import pandas as pd
 import pytest
+
 import gemmapy
-import pandas as pd
 from gemmapy import _subprocessors as sub
-import anndata as ad
-import time
-
+from gemmapy.gemmapy_api import GemmaPath
 
 api = gemmapy.GemmaPy()
 
@@ -20,6 +23,64 @@ def slow_down_tests():
     yield
     time.sleep(1)
 
+def test_path():
+    client = gemmapy.GemmaPy(path=GemmaPath.PROD)
+    assert client.raw.api_client.configuration.host == 'https://gemma.msl.ubc.ca/rest/v2'
+    client = gemmapy.GemmaPy(path=GemmaPath.DEV)
+    assert client.raw.api_client.configuration.host == 'https://dev.gemma.msl.ubc.ca/rest/v2'
+    client = gemmapy.GemmaPy(path=GemmaPath.STAGING)
+    assert client.raw.api_client.configuration.host == 'https://staging-gemma.msl.ubc.ca/rest/v2'
+    client = gemmapy.GemmaPy(path='dev')
+    assert client.raw.api_client.configuration.host == 'https://dev.gemma.msl.ubc.ca/rest/v2'
+    client = gemmapy.GemmaPy(path='https://example.com/rest/v2')
+    assert client.raw.api_client.configuration.host == 'https://example.com/rest/v2'
+
+def test_auth(monkeypatch):
+    monkeypatch.setitem(os.environ, 'GEMMA_USERNAME', '')
+    monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD', '')
+    monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD_CMD', '')
+
+    client = gemmapy.GemmaPy()
+    assert client.raw.api_client.configuration.username == ''
+    assert client.raw.api_client.configuration.password == ''
+
+    client = gemmapy.GemmaPy(auth=['foo', 'bar'])
+    assert client.raw.api_client.configuration.username == 'foo'
+    assert client.raw.api_client.configuration.password == 'bar'
+
+    with pytest.raises(OSError):
+        gemmapy.GemmaPy(auth=('username',))
+
+    monkeypatch.setitem(os.environ, 'GEMMA_USERNAME', 'foo')
+    monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD', 'bar')
+    client = gemmapy.GemmaPy()
+    assert client.raw.api_client.configuration.username == 'foo'
+    assert client.raw.api_client.configuration.password == 'bar'
+
+    monkeypatch.setitem(os.environ, 'GEMMA_USERNAME', 'foo')
+    monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD', '')
+    monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD_CMD', 'echo 1234')
+    client = gemmapy.GemmaPy()
+    assert client.raw.api_client.configuration.username == 'foo'
+    assert client.raw.api_client.configuration.password == '1234'
+
+    with pytest.raises(OSError):
+        monkeypatch.setitem(os.environ, 'GEMMA_USERNAME', 'foo')
+        monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD', '')
+        monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD_CMD', '')
+        gemmapy.GemmaPy()
+
+def test_get_single_cell_data():
+    # TODO: use a publicly available dataset
+    client = gemmapy.GemmaPy()
+    ad = client.get_single_cell_dataset_object('GSE227313', download_dir='.')
+
+def test_get_genes():
+    assert len(api.get_genes('BRCA1')) > 0
+    assert len(api.get_genes(['BRCA1'])) > 0
+    assert len(api.get_genes(672)) > 0
+    assert len(api.get_genes([672])) > 0
+    assert len(api.get_genes([672, 'BRCA1'])) > 0
 
 def test_get_result_sets():
     res = api.get_result_sets([200])