Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Position Control with mujoco-py #161

Open
bara-bba opened this issue Jan 23, 2022 · 0 comments
Open

Position Control with mujoco-py #161

bara-bba opened this issue Jan 23, 2022 · 0 comments

Comments

@bara-bba
Copy link

bara-bba commented Jan 23, 2022

Hi everyone! I would like to control a robotic EE in position only, so I wrote in the actuator part of XML code and all the magic stuff needed. The problem I encounter is that during RL the action is sampled in the ctrlrange but I would like to have the whole joint space while keeping a limited action sample. Is that any way to solve this stuff? Thanks!

THE XML FILE

<compiler angle="radian"/>

<option cone="elliptic">
    <flag gravity="disable"/>
</option>

<asset>
    <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2"
            width="512" height="512"/>
    <material name="MatGnd" reflectance="0.5" texture="texplane" texrepeat="1 1" texuniform="true"/>

    <mesh name="link0_collision" file="stl/panda/collision/link0.stl"/>
    <mesh name="link1_collision" file="stl/panda/collision/link1.stl"/>
    <mesh name="link2_collision" file="stl/panda/collision/link2.stl"/>
    <mesh name="link3_collision" file="stl/panda/collision/link3.stl"/>
    <mesh name="link4_collision" file="stl/panda/collision/link4.stl"/>
    <mesh name="link5_collision" file="stl/panda/collision/link5.stl"/>
    <mesh name="link6_collision" file="stl/panda/collision/link6.stl"/>
    <mesh name="link7_collision" file="stl/panda/collision/link7.stl"/>
    <mesh name="hand_collision" file="stl/panda/collision/hand.stl"/>
    <mesh name="finger_collision" file="stl/panda/collision/finger.stl" scale='1.75 1.0 1.75'/>
    <mesh name="link0_visual" file="stl/panda/visual/link0.stl"/>
    <mesh name="link1_visual" file="stl/panda/visual/link1.stl"/>
    <mesh name="link2_visual" file="stl/panda/visual/link2.stl"/>
    <mesh name="link3_visual" file="stl/panda/visual/link3.stl"/>
    <mesh name="link4_visual" file="stl/panda/visual/link4.stl"/>
    <mesh name="link5_visual" file="stl/panda/visual/link5.stl"/>
    <mesh name="link6_visual" file="stl/panda/visual/link6.stl"/>
    <mesh name="link7_visual" file="stl/panda/visual/link7.stl"/>
    <mesh name="hand_visual" file="stl/panda/visual/hand.stl"/>
    <mesh name="finger_visual" file="stl/panda/collision/finger.stl" scale='1.75 1.0 1.75'/>
</asset>

<visual>
	<scale framewidth="0.05" framelength="0.8" jointwidth="0.05" jointlength="0.8" actuatorwidth="0.05" actuatorlength="0.8" forcewidth="0.1" contactwidth="0.1"/>
</visual>

<default>
    <geom condim="4"/>
    <default class="panda">
        <joint pos="0 0 0" limited="true" damping="100"/>
            <position forcelimited="true" ctrllimited="true" user="1002 40 2001 -0.005 0.005"/>
            <default class="visual">
            <geom contype="0" conaffinity="0" group="0" type="mesh" rgba=".95 .99 .92 1" mass="0"/>
        </default>

        <default class="collision">
            <geom contype="1" conaffinity="1" group="3" type="mesh" rgba=".5 .6 .7 1"/>
        </default>

        <default class="panda_finger">
            <joint damping="0" armature='5'/>
        </default>
    </default>
</default>

<worldbody>
    <light pos="0 0 1000" castshadow="false"/>

    <!-- FLOOR -->
    <geom name="ground" pos="0 0 0" size="5 5 10" material="MatGnd" type="plane" contype="1" conaffinity="1"/>

    <!-- TABLE -->
    <!--geom name="wrk_table" pos="0 0 0.2" type="box" mass="90" size=".2 .2 .2" rgba="0.9 0.9 0.9 1" contype="1" conaffinity="1"/-->


    <!--  TARGET  -->
	<body name="target" pos="0 0 .45">
		<!--geom type="cylider" size="0.02 0.05" rgba=".9 0 0 .5" contype="8" conaffinity="8"/-->
        <site name="target_site" type="cylinder" size="0.02 0.05" pos="0 0 0" rgba="0.9529411765 0.8 0.03529411765 0.5"/>
	</body>

    <!-- HAND -->
    <body name="panda_hand" pos="0 0 0.8" euler="3.14159265359 0 0">

        <joint name="panda_x" pos="0 0 0" type="slide" axis="1 0 0" frictionloss="0" damping="1000"/>
        <joint name="panda_y" pos="0 0 0" type="slide" axis="0 1 0" frictionloss="0" damping="1000"/>
        <joint name="panda_z" pos="0 0 0" type="slide" axis="0 0 1" frictionloss="0" damping="1000"/>
        <joint name="panda_ball" pos="0 0 0" type="ball" frictionloss="0" damping="10"/>

        <site name="ee_site" pos="0 0 0" size="0.005, 0.005, 0.005" euler="0 0 -1.57"/>
        <inertial pos="0 0 0" euler="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
        <geom class="visual" mesh="hand_visual"/>
        <geom class="collision" mesh="hand_collision"/>

        <body name="panda_left_finger" pos="0 0.02 0.0584" quat="1 0 0 0">
            <!--joint name="panda_finger_joint1" axis="0 1 0" type="slide" range="-0.02 0.02" damping="1000" armature="500"/-->
            <geom class="visual" mesh="finger_visual"/>
            <geom class="collision" mesh="finger_collision" mass="1"/>
        </body>

        <body name="panda_right_finger" pos="0 -0.02 0.0584" quat="1 0 0 0">
            <!--joint name="panda_finger_joint2" axis="0 -1 0" type="slide" range="-0.02 0.02" damping="1000" armature="500"/-->
            <geom quat="0 0 0 1" class="visual" mesh="finger_visual"/>
            <geom quat="0 0 0 1" class="collision" mesh="finger_collision" mass="1"/>
        </body>

        <!-- COMPONENT -->
        <body name="component" pos="0 0 0.18">
            <geom type="cylinder" size="0.02 0.05" mass="0.1" margin="0.001"/>
            <site name="component_site" pos="0 0 0.05" rgba="0.9529411765 0.8 0.03529411765 1"/>

        </body>

    </body>

</worldbody>

<actuator>

    <position name="panda_x" joint="panda_x" class="panda_finger" gear="1" kp="100000" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.01 0.01"/>
    <position name="panda_y" joint="panda_y" class="panda_finger" gear="1" kp="100000" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.01 0.01"/>
    <position name="panda_z" joint="panda_z" class="panda_finger" gear="1" kp="100000" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.01 0.01"/>
    <position name="panda_ball_1" joint="panda_ball" class="panda_finger" kp="10000" gear="1 0 0 0 0 0" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.03 0.03"/>
    <position name="panda_ball_2" joint="panda_ball" class="panda_finger" kp="10000" gear="0 1 0 0 0 0" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.03 0.03"/>
    <position name="panda_ball_3" joint="panda_ball" class="panda_finger" kp="10000" gear="0 0 1 0 0 0" forcerange="-50 50" ctrllimited="true" ctrlrange="-0.03 0.03"/>

</actuator>

<sensor>
    <force name="ee_force_sensor" site="ee_site"/>
    <torque name="ee_torque_sensor" site="ee_site"/>
</sensor>

THE ENVIRONMENT

    import numpy as np
    from gym import utils
    from gym.envs.mujoco import mujoco_env
    
    
    class PandaEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    
        def __init__(self):
            utils.EzPickle.__init__(self)
            mujoco_env.MujocoEnv.__init__(self, "/home/bara/doc/rlkit/generic/panda.xml", 2)
    
        def step(self, a):
    
            # STEP
            self.do_simulation(a, self.frame_skip)
    
            #DISTANCE
            xpos_component = self.get_body_com("component")
            xpos_target = self.get_body_com("target")
            dist = xpos_component - xpos_target
    
            #REWARD
            reward_dist = -np.linalg.norm(dist)
            reward = reward_dist  # More contributions to rewards may be added
            # print("REWARD: " + str(reward))
            # print("ACTION: " + str(a))
    
            ob = self._get_obs()
            done = False
    
            return ob, reward, done, dict(reward_dist=reward_dist)
    
        def viewer_setup(self):
            self.viewer.cam.trackbodyid = 0
    
        def reset_model(self):
            qpos = np.zeros(self.model.nq)
            qvel = np.zeros(self.model.nv)
            self.set_state(qpos, qvel)
            return self._get_obs()
    
        def _get_obs(self):
            return np.concatenate(
                [
                    self.get_body_com("component"),
                    self.get_body_com("target")
                ]
            )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant