diff --git a/logger.py b/logger.py index cc8cd744..99b1509d 100755 --- a/logger.py +++ b/logger.py @@ -4,6 +4,7 @@ import numpy as np import cv2 import torch +import csv # import h5py class Logger(): @@ -73,6 +74,11 @@ def save_heightmaps(self, iteration, color_heightmap, depth_heightmap, mode): def write_to_log(self, log_name, log): np.savetxt(os.path.join(self.transitions_directory, '%s.log.txt' % log_name), log, delimiter=' ') + # For stack classifier data (might not need) + def save_label(self, log_name, log): + with open(os.path.join(self.color_images_directory, '%s.txt' % log_name), 'w') as f: + csv.writer(f, delimiter=' ').writerows(log) + def save_model(self, model, name): torch.save(model.cpu().state_dict(), os.path.join(self.models_directory, 'snapshot.%s.pth' % (name))) diff --git a/main.py b/main.py index d64540b0..7877a9cb 100755 --- a/main.py +++ b/main.py @@ -16,6 +16,16 @@ from trainer import Trainer from logger import Logger import utils +try: + import efficientnet_pytorch + from efficientnet_pytorch import EfficientNet +except ImportError: + print('efficientnet_pytorch is not available, using densenet. ' + 'Try installing https://github.com/ahundt/EfficientNet-PyTorch for all features.' + 'A version of EfficientNets without dilation can be installed with the command:' + ' pip3 install efficientnet-pytorch --user --upgrade' + 'See https://github.com/lukemelas/EfficientNet-PyTorch for details') + efficientnet_pytorch = None # to convert action names to the corresponding ID number and vice-versa ACTION_TO_ID = {'push': 0, 'grasp': 1, 'place': 2} @@ -169,6 +179,23 @@ def main(args): else: goal_condition_len = 0 + #TODO(hkwon214) temporary + # ------ Image Classifier options ----- + use_classifier = args.use_classifier + checkpoint_path = args.checkpoint_path + #TODO(hkwon214) hard coded to use efficientnet for now. modify for future? + if use_classifier: + if checkpoint_path is None: + raise NotImplementedError('No checkpoints') + model = EfficientNet.from_name('efficientnet-b0') + #model = nn.DataParallel(model) + model_stack = model_stack.cuda() + checkpoint = torch.load(checkpoint_path) + model_stack.load_state_dict(checkpoint['state_dict']) + model_stack.eval() + + + # Set random seed np.random.seed(random_seed) @@ -223,7 +250,7 @@ def set_nonlocal_success_variables_false(): nonlocal_variables['grasp_color_success'] = False nonlocal_variables['place_color_success'] = False - def check_stack_update_goal(place_check=False, top_idx=-1): + def check_stack_update_goal(place_check=False, top_idx=-1, use_classifier = False, input_img = None): """ Check nonlocal_variables for a good stack and reset if it does not match the current goal. # Params @@ -246,8 +273,13 @@ def check_stack_update_goal(place_check=False, top_idx=-1): # only the place check expects the current goal to be met current_stack_goal = current_stack_goal[:-1] stack_shift = 0 - # TODO(ahundt) BUG Figure out why a real stack of size 2 or 3 and a push which touches no blocks does not pass the stack_check and ends up a MISMATCH in need of reset. (update: may now be fixed, double check then delete when confirmed) - stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_stack(current_stack_goal, top_idx=top_idx, stack_axis=stack_axis) + if use_classifier: + # TODO(hkwon214) Add image classifier + stack_matches_goal, nonlocal_variables['stack_height'] = robot.stack_reward(model_stack, input_img, current_stack_goal) + else: + # TODO(ahundt) BUG Figure out why a real stack of size 2 or 3 and a push which touches no blocks does not pass the stack_check and ends up a MISMATCH in need of reset. (update: may now be fixed, double check then delete when confirmed) + stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_stack(current_stack_goal, top_idx=top_idx, stack_axis=stack_axis) + nonlocal_variables['partial_stack_success'] = stack_matches_goal if nonlocal_variables['stack_height'] == 1: # A stack of size 1 does not meet the criteria for a partial stack success @@ -422,7 +454,14 @@ def process_actions(): # Check if the push caused a topple, size shift zero because # place operations expect increased height, # while push expects constant height. - needed_to_reset = check_stack_update_goal() + #TODO(hkwon214) temp + #needed_to_reset = check_stack_update_goal() + color_img, depth_img = robot.get_camera_data() + depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration + # Get heightmap from RGB-D image (by re-projecting 3D point cloud) + color_heightmap_after_action, depth_heightmap_after_action = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) + + needed_to_reset = check_stack_update_goal(use_classifier = use_classifier, input_img = depth_heightmap_after_action) if not place or not needed_to_reset: print('Push motion successful (no crash, need not move blocks): %r' % (nonlocal_variables['push_success'])) elif nonlocal_variables['primitive_action'] == 'grasp': @@ -441,7 +480,15 @@ def process_actions(): top_idx = -2 # check if a failed grasp led to a topple, or if the top block was grasped # TODO(ahundt) in check_stack() support the check after a specific grasp in case of successful grasp topple. Perhaps allow the top block to be specified? - needed_to_reset = check_stack_update_goal(top_idx=top_idx) + #needed_to_reset = check_stack_update_goal(top_idx=top_idx) + #TODO(hkwon214) temp + + color_img, depth_img = robot.get_camera_data() + depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration + # Get heightmap from RGB-D image (by re-projecting 3D point cloud) + color_heightmap_after_action, depth_heightmap_after_action = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) + + needed_to_reset = check_stack_update_goal(top_idx=top_idx, use_classifier = use_classifier, input_img = depth_heightmap_after_action) if nonlocal_variables['grasp_success']: # robot.restart_sim() successful_grasp_count += 1 @@ -464,7 +511,15 @@ def process_actions(): elif nonlocal_variables['primitive_action'] == 'place': place_count += 1 nonlocal_variables['place_success'] = robot.place(primitive_position, best_rotation_angle) - needed_to_reset = check_stack_update_goal(place_check=True) + + #TODO(hkwon214) robot executed task -> capture image for image classifier + color_img, depth_img = robot.get_camera_data() + depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration + # Get heightmap from RGB-D image (by re-projecting 3D point cloud) + color_heightmap_after_action, depth_heightmap_after_action = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) + + # needed_to_reset = check_stack_update_goal(place_check=True) + needed_to_reset = check_stack_update_goal(place_check=True, use_classifier = use_classifier, input_img = depth_heightmap_after_action) if not needed_to_reset and nonlocal_variables['place_success'] and nonlocal_variables['partial_stack_success']: partial_stack_count += 1 nonlocal_variables['stack'].next() @@ -971,6 +1026,11 @@ def experience_replay(method, prev_primitive_action, prev_reward_value, trainer, parser.add_argument('--grasp_color_task', dest='grasp_color_task', action='store_true', default=False, help='enable grasping specific colored objects') parser.add_argument('--grasp_count', dest='grasp_cout', type=int, action='store', default=0, help='number of successful task based grasps') + # TODO(hkwon214) + # ------ Image Classifier Options (Temporary) ------ + parser.add_argument('--use_classifier', dest='use_classifier', action='store_true', default=False, help='use image classifier weights') + parser.add_argument('--checkpoint_path', dest='checkpoint_path', action='store', default='objects/blocks', help='directory of image classifier weights') + # Run main program with specified arguments args = parser.parse_args() main(args) diff --git a/robot.py b/robot.py index 3c9e069b..0fade58d 100755 --- a/robot.py +++ b/robot.py @@ -6,7 +6,9 @@ import numpy as np import utils from simulation import vrep - +import torch +import cv2 +from scipy import ndimage, misc class Robot(object): """ Key member variables: @@ -1243,6 +1245,41 @@ def check_stack(self, object_color_sequence, distance_threshold=0.06, top_idx=-1 # TODO(ahundt) add check_stack for real robot return goal_success, detected_height + # TODO(hkwon214): From image classifier + def stack_reward(self, model, input_img, current_stack_goal): + #input_img = torch.from_numpy(input_img) + print('IMAGE SHAPE: ' + str(input_img.shape)) + goal_success = False + stack_class = model(input_img) + stack_class = stack_class.item() + detected_height = stack_class + 1 + if current_stack_goal == detected_height: + goal_success = True + return goal_success, detected_height + + def check_incremental_height(self,input_img, current_stack_goal): + goal_success = False + img_median = ndimage.median_filter(input_img, size=5) + max_z = np.max(img_median) + print('MAXZ ' + str(max_z)) + if (max_z > 0.051) and (max_z < 0.052): + detected_height = 1 + elif (max_z > 0.10) and (max_z < 0.11): + detected_height = 2 + elif (max_z > 0.155) and (max_z < 0.156): + detected_height = 3 + elif (max_z > 0.20) and (max_z < 0.21): + detected_height = 4 + if current_stack_goal == detected_height: + goal_success = True + return goal_success, detected_height + + def check_z_height(self,input_img): + # CV or + img_median = ndimage.median_filter(input_img, size=5) + max_z = np.max(img_median) + return max_z + def restart_real(self): diff --git a/stack_classifier_data_gen.py b/stack_classifier_data_gen.py new file mode 100644 index 00000000..6f0e4825 --- /dev/null +++ b/stack_classifier_data_gen.py @@ -0,0 +1,218 @@ +import time +import os +import random +import threading +import argparse +import matplotlib.pyplot as plt +import numpy as np +import scipy as sc +import cv2 +from collections import namedtuple +import torch +from torch.autograd import Variable +from robot import Robot +from trainer import Trainer +from logger import Logger +import utils +from main import StackSequence +import csv +import torch._utils +import torchvision.transforms as transforms +try: + torch._utils._rebuild_tensor_v2 +except AttributeError: + def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): + tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) + tensor.requires_grad = requires_grad + tensor._backward_hooks = backward_hooks + return tensor + torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 +try: + import efficientnet_pytorch + from efficientnet_pytorch import EfficientNet +except ImportError: + print('efficientnet_pytorch is not available, using densenet. ' + 'Try installing https://github.com/ahundt/EfficientNet-PyTorch for all features.' + 'A version of EfficientNets without dilation can be installed with the command:' + ' pip3 install efficientnet-pytorch --user --upgrade' + 'See https://github.com/lukemelas/EfficientNet-PyTorch for details') + efficientnet_pytorch = None + +############### Testing Block Stacking ####### +is_sim = True# Run in simulation? +obj_mesh_dir = os.path.abspath('objects/blocks') if is_sim else None # Directory containing 3D mesh files (.obj) of objects to be added to simulation +num_obj = 4 if is_sim else None # Number of objects to add to simulation +tcp_host_ip = args.tcp_host_ip if not is_sim else None # IP and port to robot arm as TCP client (UR5) +tcp_port = args.tcp_port if not is_sim else None +rtc_host_ip = args.rtc_host_ip if not is_sim else None # IP and port to robot arm as real-time client (UR5) +rtc_port = args.rtc_port if not is_sim else None +if is_sim: + workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) +else: + workspace_limits = np.asarray([[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) +heightmap_resolution = 0.002 # Meters per pixel of heightmap +random_seed = 1234 +force_cpu = False + + +# -------------- Testing options -------------- +is_testing = False +max_test_trials = 1 # Maximum number of test runs per case/scenario +test_preset_cases = False +test_preset_file = os.path.abspath(args.test_preset_file) if test_preset_cases else None + +##################################################################### +# Continue logging from previous session +continue_logging = False # Set True if continue logging from previous session +split = 'train' # Set ['train', 'val', 'test'] +root = './logs_for_classifier' + +logging_directory = os.path.join(root, split) +if split == 'train': + num_stacks = 33400 + max_iterations = 100000 +elif split == 'test' or split == 'val': + num_stacks = 3400 + max_iterations = 10000 + +labels = [] +data_num = 1 +##################################################################### + +# ------ Pre-loading and logging options ------ +# load_snapshot = args.load_snapshot # Load pre-trained snapshot of model? +# snapshot_file = os.path.abspath(args.snapshot_file) if load_snapshot else None +# continue_logging = False # Continue logging from previous session +#logging_directory = './logs_for_classifier/training' +logging_directory = os.path.abspath(logging_directory) if continue_logging else os.path.abspath('logs_for_classifier') +# save_visualizations = args.save_visualizations # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True + +# Initialize data logger +logger = Logger(continue_logging, logging_directory) +# logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose +# logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters + +# Set random seed +np.random.seed(random_seed) +# Do we care about color? Switch to +# True to run a color order stacking test, +# False tests stacking order does not matter. +grasp_color_task = False +# are we doing a stack even if we don't care about colors +place_task = True + +robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, + tcp_host_ip, tcp_port, rtc_host_ip, rtc_port, + is_testing, test_preset_cases, test_preset_file, + place=place_task, grasp_color_task=grasp_color_task) +stacksequence = StackSequence(num_obj, is_goal_conditioned_task=grasp_color_task or place_task) + +print('full stack sequence: ' + str(stacksequence.object_color_sequence)) +best_rotation_angle = 3.14 +blocks_to_move = num_obj - 1 + +############## Load Image Classifier Weights ############### +num_class = 4 +checkpoint_path = "./eval-20190818-154803-6ebd1fa-stack_height-efficientnet-0/model_best.pth.tar" +height_count_sum = 0 +stack_success_sum = 0 + +# model_stack = EfficientNet.from_name('efficientnet-b0',override_params={'num_classes': 4}) +# #model = nn.DataParallel(model) +# model_stack = model_stack.cuda() +# checkpoint = torch.load(checkpoint_path) +# model_stack.load_state_dict(checkpoint['state_dict']) +# model_stack.eval() + +model_name = 'efficientnet-b0' +image_size = EfficientNet.get_image_size(model_name) + +if continue_logging == False: + iteration = 0 +else: + label_text = os.path.join(logging_directory, 'data','color-images', 'stack_label.txt') + myfile = open(label_text, 'r') + myfiles = [line.split(' ') for line in myfile.readlines()] + iteration = int(myfiles[-1][1]) +1 + +for stack in range(num_stacks): + print('++++++++++++++++++++++++++++++++++++++++++++++++++') + print('+++++++ Making New Stack ++++++++') + print('++++++++++++++++++++++++++++++++++++++++++++++++++') + for i in range(blocks_to_move): + print('----------------------------------------------') + stacksequence.next() + stack_goal = stacksequence.current_sequence_progress() + print('STACK GOAL ' + str(len(stack_goal))) + block_to_move = stack_goal[-1] + print('move block: ' + str(i) + ' current stack goal: ' + str(stack_goal)) + block_positions = robot.get_obj_positions_and_orientations()[0] + primitive_position = block_positions[block_to_move] + robot.grasp(primitive_position, best_rotation_angle, object_color=block_to_move) + block_positions = robot.get_obj_positions_and_orientations()[0] + base_block_to_place = stack_goal[-2] + primitive_position = block_positions[base_block_to_place] + place = robot.place(primitive_position, best_rotation_angle) + print('place ' + str(i) + ' : ' + str(place)) + # check if we don't care about color + if not grasp_color_task: + # Deliberately change the goal stack order to test the non-ordered check + stack_goal = np.random.permutation(stack_goal) + print('fake stack goal to test any stack order: ' + str(stack_goal)) + stack_success, height_count = robot.check_stack(stack_goal) + ####################################### + stack_class = height_count - 1 + # Get latest RGB-D image + color_img, depth_img = robot.get_camera_data() + depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration + + # Get heightmap from RGB-D image (by re-projecting 3D point cloud) + color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) + valid_depth_heightmap = depth_heightmap.copy() + valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 + + # Save RGB-D images and RGB-D heightmaps + #logger.save_images(iteration, color_img, depth_img, stack_class) # Used stack_class instead of mode + #logger.save_heightmaps(iteration, color_heightmap, valid_depth_heightmap, stack_class) # Used stack_class instead of mode + ########################################### + #depth_heightmap = depth_heightmap[np.newaxis, np.newaxis,:,:] + + + # tfms = transforms.Compose([transforms.Resize(image_size), transforms.CenterCrop(image_size), + # transforms.ToTensor(), + # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),]) + # depth_heightmap = tfms(depth_heightmap).unsqueeze(0) + + # stack_success_classifier, height_count_classifier= robot.stack_reward(model_stack, depth_heightmap, stack_goal) + stack_success_z, height_count_z = robot.check_incremental_height(valid_depth_heightmap, len(stack_goal)) + stack_class_z = height_count_z - 1 + z_height = robot.check_z_height + print('HEIGHT Z,: ' + str(height_count_z)) + print('HEIGHT Z,: ' + str(np.max(valid_depth_heightmap))) + print('check z,: ' + str(z_height)) + filename = '%06d.%s.color.png' % (iteration, stack_class) + if continue_logging: + with open(label_text,"a") as f: + f.writelines("\n") + name = [filename,' ', str(iteration),' ', str(stack_class)] + f.writelines(name) + f.close() + else: + labels.append([filename,iteration,stack_class]) + logger.save_label('stack_label', labels) + print('stack success part ' + str(i+1) + ' of ' + str(blocks_to_move) + ': ' + str(stack_success) + ':' + str(height_count) +':' + str(stack_class)) + print('stack success classifier part ' + str(i+1) + ' of ' + str(blocks_to_move) + ': ' + str(stack_success_z) + ':' + str(height_count) +':' + str(stack_class_z)) + iteration += 1 + if height_count_z == height_count: + height_count_sum += 1 + if stack_success_z== stack_success: + stack_success_sum += 1 + print('stack height classifier accuracy ' + str(i+1) + ' height_count ' + str(height_count_sum/iteration)) + print('stack success classifier accuracy ' + str(i+1) + ' stack_success ' + str(stack_success_sum/iteration)) + + # reset scene + robot.reposition_objects() + # determine first block to grasp + stacksequence.next() + if iteration > max_iterations: + break