I'm trying out how backfills work since we need it for my current project. To do this I created a super simple DAG with two operators in it. When the backfill run for this DAG it succeeded for some of the DAGs, but the rest is now stuck in a running state but I can't start the specific task. I have no problem with things failing as long as I can restart them. I have also tried to restart every kube pod in the cluster to see if that helped, it didn't. When I try to clear the state nothing happens, and when I click RUN on a task it gives me this error:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1988, in wsgi_app
response = self.full_dispatch_request()
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1641, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1544, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1639, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1625, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/usr/local/lib/python2.7/site-packages/flask_admin/base.py", line 69, in inner
return self._run_view(f, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/flask_admin/base.py", line 368, in _run_view
return fn(self, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/flask_login.py", line 755, in decorated_view
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/airflow/www/utils.py", line 262, in wrapper
return f(*args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/airflow/www/utils.py", line 309, in wrapper
return f(*args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/airflow/www/views.py", line 895, in run
executor.heartbeat()
File "/usr/local/lib/python2.7/site-packages/airflow/executors/base_executor.py", line 124, in heartbeat
self.execute_async(key, command=command, queue=queue)
File "/usr/local/lib/python2.7/site-packages/airflow/executors/celery_executor.py", line 76, in execute_async
args=[command], queue=queue)
File "/usr/local/lib/python2.7/site-packages/celery/app/task.py", line 536, in apply_async
**options
File "/usr/local/lib/python2.7/site-packages/celery/app/base.py", line 737, in send_task
amqp.send_task_message(P, name, message, **options)
File "/usr/local/lib/python2.7/contextlib.py", line 35, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python2.7/site-packages/kombu/connection.py", line 419, in _reraise_as_library_errors
sys.exc_info()[2])
File "/usr/local/lib/python2.7/site-packages/kombu/connection.py", line 414, in _reraise_as_library_errors
yield
File "/usr/local/lib/python2.7/site-packages/celery/app/base.py", line 736, in send_task
self.backend.on_task_call(P, task_id)
File "/usr/local/lib/python2.7/site-packages/celery/backends/redis.py", line 189, in on_task_call
self.result_consumer.consume_from(task_id)
File "/usr/local/lib/python2.7/site-packages/celery/backends/redis.py", line 75, in consume_from
return self.start(task_id)
File "/usr/local/lib/python2.7/site-packages/celery/backends/redis.py", line 57, in start
self._consume_from(initial_task_id)
File "/usr/local/lib/python2.7/site-packages/celery/backends/redis.py", line 82, in _consume_from
self._pubsub.subscribe(key)
File "/usr/local/lib/python2.7/site-packages/redis/client.py", line 2482, in subscribe
ret_val = self.execute_command('SUBSCRIBE', *iterkeys(new_channels))
File "/usr/local/lib/python2.7/site-packages/redis/client.py", line 2404, in execute_command
self._execute(connection, connection.send_command, *args)
File "/usr/local/lib/python2.7/site-packages/redis/client.py", line 2415, in _execute
connection.connect()
File "/usr/local/lib/python2.7/site-packages/redis/connection.py", line 489, in connect
raise ConnectionError(self._error_message(e))
OperationalError: Error -2 connecting to airflow-redis-service:6379. Name or service not known.