Merge pull request #52 from alonme/always-use-provided-project-id

mxmzdlv · web-flow · commit 01b42d4efed0 · 2020-04-17T20:03:07.000+12:00
Use the provided project_id when using a service account
diff --git a/pybigquery/parse_url.py b/pybigquery/parse_url.py
@@ -29,6 +29,7 @@ def parse_url(url):
     # maximum_billing_tier (deprecated)
     if 'maximum_billing_tier' in query: raise ValueError("maximum_billing_tier is a deprecated argument")
 
+    project_id = url.host
     location = None
     dataset_id = url.database or None
     arraysize = None
@@ -55,9 +56,9 @@ def parse_url(url):
         # if a dataset_id exists, we need to return a job_config that isn't None
         # so it can be updated with a dataset reference from the client
         if dataset_id:
-            return location, dataset_id, arraysize, credentials_path, QueryJobConfig()
+            return project_id, location, dataset_id, arraysize, credentials_path, QueryJobConfig()
         else:
-            return location, dataset_id, arraysize, credentials_path, None
+            return project_id, location, dataset_id, arraysize, credentials_path, None
 
     job_config = QueryJobConfig()
 
@@ -169,4 +170,4 @@ def parse_url(url):
         except AttributeError:
             raise ValueError("invalid write_disposition in url query: " + query['write_disposition'])
 
-    return location, dataset_id, arraysize, credentials_path, job_config
+    return project_id, location, dataset_id, arraysize, credentials_path, job_config
diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py
@@ -288,33 +288,35 @@ def dbapi(cls):
     def _add_default_dataset_to_job_config(job_config, project_id, dataset_id):
         # If dataset_id is set, then we know the job_config isn't None
         if dataset_id:
-            # If project_id is missing, use default project_id
+            # If project_id is missing, use default project_id for the current environment
             if not project_id:
                 _, project_id = auth.default()
 
             job_config.default_dataset = '{}.{}'.format(project_id, dataset_id)
 
 
-    def _create_client_from_credentials(self, credentials, default_query_job_config):
+    def _create_client_from_credentials(self, credentials, default_query_job_config, project_id):
+        if project_id is None:
+            project_id = credentials.project_id
+
         scopes = (
                 'https://www.googleapis.com/auth/bigquery',
                 'https://www.googleapis.com/auth/cloud-platform',
                 'https://www.googleapis.com/auth/drive'
             )
         credentials = credentials.with_scopes(scopes)
 
-        self._add_default_dataset_to_job_config(default_query_job_config,
-                                        credentials.project_id, self.dataset_id)
+        self._add_default_dataset_to_job_config(default_query_job_config, project_id, self.dataset_id)
 
         return bigquery.Client(
-                project=credentials.project_id,
+                project=project_id,
                 credentials=credentials,
                 location=self.location,
                 default_query_job_config=default_query_job_config,
             )
 
     def create_connect_args(self, url):
-        location, dataset_id, arraysize, credentials_path, default_query_job_config = parse_url(url)
+        project_id, location, dataset_id, arraysize, credentials_path, default_query_job_config = parse_url(url)
 
         self.arraysize = self.arraysize or arraysize
         self.location = location or self.location
@@ -323,18 +325,17 @@ def create_connect_args(self, url):
 
         if self.credentials_path:
             credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
-            client = self._create_client_from_credentials(credentials, default_query_job_config)
+            client = self._create_client_from_credentials(credentials, default_query_job_config, project_id)
 
         elif self.credentials_info:
             credentials = service_account.Credentials.from_service_account_info(self.credentials_info)
-            client = self._create_client_from_credentials(credentials, default_query_job_config)
+            client = self._create_client_from_credentials(credentials, default_query_job_config, project_id)
 
         else:
-            self._add_default_dataset_to_job_config(default_query_job_config,
-                                        url.host, dataset_id)
+            self._add_default_dataset_to_job_config(default_query_job_config, project_id, dataset_id)
 
             client = bigquery.Client(
-                project=url.host,
+                project=project_id,
                 location=self.location,
                 default_query_job_config=default_query_job_config
             )
diff --git a/test/test_parse_url.py b/test/test_parse_url.py
@@ -46,8 +46,9 @@ def url_with_everything():
 
 
 def test_basic(url_with_everything):
-    location, dataset_id, arraysize, credentials_path, job_config = parse_url(url_with_everything)
+    project_id, location, dataset_id, arraysize, credentials_path, job_config = parse_url(url_with_everything)
 
+    assert project_id == 'some-project'
     assert location == 'some-location'
     assert dataset_id == 'some-dataset'
     assert arraysize == 1000
@@ -68,7 +69,7 @@ def test_basic(url_with_everything):
     ('write_disposition', 'WRITE_APPEND'),
 ])
 def test_all_values(url_with_everything, param, value):
-    job_config = parse_url(url_with_everything)[4]
+    job_config = parse_url(url_with_everything)[5]
 
     config_value = getattr(job_config, param)
     if callable(value):
@@ -108,8 +109,9 @@ def test_empty_url():
 
 def test_empty_with_non_config():
     url = parse_url(make_url('bigquery:///?location=some-location&arraysize=1000&credentials_path=/some/path/to.json'))
-    location, dataset_id, arraysize, credentials_path, job_config = url
+    project_id, location, dataset_id, arraysize, credentials_path, job_config = url
 
+    assert project_id is None
     assert location == 'some-location'
     assert dataset_id is None
     assert arraysize == 1000
@@ -118,8 +120,9 @@ def test_empty_with_non_config():
 
 def test_only_dataset():
     url = parse_url(make_url('bigquery:///some-dataset'))
-    location, dataset_id, arraysize, credentials_path, job_config = url
+    project_id, location, dataset_id, arraysize, credentials_path, job_config = url
 
+    assert project_id is None
     assert location is None
     assert dataset_id == 'some-dataset'
     assert arraysize is None