Please help me to resolve this erro i 've been trying a lot of different ways to solve it, the thing is this, im trying to pull a data which is over les 200,0 00 registers and for no to break the connection for firestore im processing the regiters in batches of date in range of 3 days, but i keep getting this error message, im using Databricks
dataframes_dict = {}
for business in businesses:
print(f"Processing business {business}\n\n")
for i in date_dict:
first_day = date_dict[i]['first_day']
last_day = date_dict[i]['last_day']
print(f'Range: {first_day} - {last_day}')
for insurer in insurers:
print(f'Insurer {insurer}')
PROD_EMI_COLLECTION_PATH = f"xxxxxxx/xxxxxxx/xxxxxx/{business}/{insurer}"
# Definition of paths for Firestore.
query_cs = fstore.db.collection(PROD_EMI_COLLECTION_PATH)
query_cs = query_cs.where("request_front.emission_date", ">=", first_day).where("request_front.emission_date", "<=", last_day)
results_cs = query_cs.stream(retry=Retry(), timeout=600)
values = []
for doc in results_cs:
doc_dict = doc.to_dict() # Store the resulting dictionary
values.append(doc_dict) # Add the dictionary to the list of values
emissions = pd.DataFrame(values)
dataframes_dict.update({f'{business+insurer+str(i)}': emissions})
--------------------------------------------------------------------------
_MultiThreadedRendezvous Traceback (most recent call last)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/google/api_core/grpc_helpers.py:116, in StreamingResponseIterator.next_(self)
115 return result
--> 116 return next(self._wrapped)
117 except grpc.RpcError as exc:
118 # If the stream has already returned data, we cannot recover here.
File /databricks/python/lib/python3.10/site-packages/grpc/channel.py:426, in _Rendezvous.next_(self)
425 def _next_(self):
--> 426 return self._next()
File /databricks/python/lib/python3.10/site-packages/grpc/_channel.py:809, in _MultiThreadedRendezvous._next(self)
808 else:
--> 809 raise self
811 def _response_ready():
_MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "UNKNOWN:Error received from peer ipv4:
173.194.206.95:443 {created_time:"2024-07-02T03:04:49.290125715+00:00", grpc_status:4, grpc_message:"Deadline Exceeded"}"
>
The above exception was the direct cause of the following exception:
DeadlineExceeded Traceback (most recent call last)
File <command-4213225729853171>, line 21
19 values = []
20 has_documents = False # Bandera para verificar si hay documentos
---> 21 for doc in results_cs:
22 has_documents = True # Si entra al ciclo, hay documentos
23 doc_dict = doc.to_dict() # Almacena el diccionario resultante
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/google/cloud/firestore_v1/query.py:361, in Query.stream(self, transaction, retry, timeout)
359 while True:
360 try:
--> 361 response = next(response_iterator, None)
362 except exceptions.GoogleAPICallError as exc:
363 if self._retry_query_after_exception(exc, retry, transaction):
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/google/api_core/grpc_helpers.py:119, in StreamingResponseIterator.next_(self)
116 return next(self._wrapped)
117 except grpc.RpcError as exc:
118 # If the stream has already returned data, we cannot recover here.
--> 119 raise exceptions.from_grpc_error(exc) from exc
DeadlineExceeded: 504 Deadline Exceeded