>>> !pip3 install keras_rl2
>>> import gym
>>> import numpy as np
>>> import tensorflow as tf
>>> from keras.models import Sequential
>>> from keras.layers import Dense, Activation, Flatten
>>> from keras.optimizers import Adam
>>> from rl.agents import DQNAgent
>>> from rl.policy import BoltzmannQPolicy
>>> from rl.memory import SequentialMemory
>>> tf.__version__
'2.11.0'
>>> env=gym.make("CartPole-v1")
>>> n_actions=env.action_space.n
>>> n_obs=env.observation_space.shape
>>> model=Sequential()
>>> model.add(Flatten(input_shape=(1,)+n_obs))
>>> model.add(Dense(16,activation='relu'))
>>> model.add(Dense(16,activation='relu'))
>>> model.add(Dense(16,activation='relu'))
>>> model.add(Dense(n_actions,activation='linear'))
>>> model.summary()
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_10 (Flatten) (None, 4) 0
dense_44 (Dense) (None, 16) 80
dense_45 (Dense) (None, 16) 272
dense_46 (Dense) (None, 16) 272
dense_47 (Dense) (None, 2) 34
=================================================================
Total params: 658
Trainable params: 658
Non-trainable params: 0
_________________________________________________________________
>>>
>>> memory = SequentialMemory(limit=50000, window_length=1)
>>> dqn = DQNAgent(model=model, nb_actions=n_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=BoltzmannQPolicy())
>>> dqn.compile(Adam(learning_rate=1e-3), metrics=['mse'])
2023-01-22 11:40:46.046001: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_44/kernel/Assign' id:5823 op device:{requested: '', assigned: ''} def:{{{node dense_44/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_44/kernel, dense_44/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
>>>
>>> dqn.fit(env, nb_steps=50000)
Training for 50000 steps ...
Interval 1 (0 steps performed)
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 3 dimensions. The detected shape was (1, 1, 2) + inhomogeneous part.