1name: "VGG_VOC0712_SSD_300x300_deploy" 2layer { 3 name: "input" 4 type: "Input" 5 top: "data" 6 input_param { 7 shape { 8 dim: 1 9 dim: 3 10 dim: 300 11 dim: 300 12 } 13 } 14} 15layer { 16 name: "conv1_1" 17 type: "Convolution" 18 bottom: "data" 19 top: "conv1_1" 20 param { 21 lr_mult: 1 22 decay_mult: 1 23 } 24 param { 25 lr_mult: 2 26 decay_mult: 0 27 } 28 convolution_param { 29 num_output: 64 30 pad: 1 31 kernel_size: 3 32 weight_filler { 33 type: "xavier" 34 } 35 bias_filler { 36 type: "constant" 37 value: 0 38 } 39 } 40} 41layer { 42 name: "relu1_1" 43 type: "ReLU" 44 bottom: "conv1_1" 45 top: "conv1_1" 46} 47layer { 48 name: "conv1_2" 49 type: "Convolution" 50 bottom: "conv1_1" 51 top: "conv1_2" 52 param { 53 lr_mult: 1 54 decay_mult: 1 55 } 56 param { 57 lr_mult: 2 58 decay_mult: 0 59 } 60 convolution_param { 61 num_output: 64 62 pad: 1 63 kernel_size: 3 64 weight_filler { 65 type: "xavier" 66 } 67 bias_filler { 68 type: "constant" 69 value: 0 70 } 71 } 72} 73layer { 74 name: "relu1_2" 75 type: "ReLU" 76 bottom: "conv1_2" 77 top: "conv1_2" 78} 79layer { 80 name: "pool1" 81 type: "Pooling" 82 bottom: "conv1_2" 83 top: "pool1" 84 pooling_param { 85 pool: MAX 86 kernel_size: 2 87 stride: 2 88 } 89} 90layer { 91 name: "conv2_1" 92 type: "Convolution" 93 bottom: "pool1" 94 top: "conv2_1" 95 param { 96 lr_mult: 1 97 decay_mult: 1 98 } 99 param { 100 lr_mult: 2 101 decay_mult: 0 102 } 103 convolution_param { 104 num_output: 128 105 pad: 1 106 kernel_size: 3 107 weight_filler { 108 type: "xavier" 109 } 110 bias_filler { 111 type: "constant" 112 value: 0 113 } 114 } 115} 116layer { 117 name: "relu2_1" 118 type: "ReLU" 119 bottom: "conv2_1" 120 top: "conv2_1" 121} 122layer { 123 name: "conv2_2" 124 type: "Convolution" 125 bottom: "conv2_1" 126 top: "conv2_2" 127 param { 128 lr_mult: 1 129 decay_mult: 1 130 } 131 param { 132 lr_mult: 2 133 decay_mult: 0 134 } 135 convolution_param { 136 num_output: 128 137 pad: 1 138 kernel_size: 3 139 weight_filler { 140 type: "xavier" 141 } 142 bias_filler { 143 type: "constant" 144 value: 0 145 } 146 } 147} 148layer { 149 name: "relu2_2" 150 type: "ReLU" 151 bottom: "conv2_2" 152 top: "conv2_2" 153} 154layer { 155 name: "pool2" 156 type: "Pooling" 157 bottom: "conv2_2" 158 top: "pool2" 159 pooling_param { 160 pool: MAX 161 kernel_size: 2 162 stride: 2 163 } 164} 165layer { 166 name: "conv3_1" 167 type: "Convolution" 168 bottom: "pool2" 169 top: "conv3_1" 170 param { 171 lr_mult: 1 172 decay_mult: 1 173 } 174 param { 175 lr_mult: 2 176 decay_mult: 0 177 } 178 convolution_param { 179 num_output: 256 180 pad: 1 181 kernel_size: 3 182 weight_filler { 183 type: "xavier" 184 } 185 bias_filler { 186 type: "constant" 187 value: 0 188 } 189 } 190} 191layer { 192 name: "relu3_1" 193 type: "ReLU" 194 bottom: "conv3_1" 195 top: "conv3_1" 196} 197layer { 198 name: "conv3_2" 199 type: "Convolution" 200 bottom: "conv3_1" 201 top: "conv3_2" 202 param { 203 lr_mult: 1 204 decay_mult: 1 205 } 206 param { 207 lr_mult: 2 208 decay_mult: 0 209 } 210 convolution_param { 211 num_output: 256 212 pad: 1 213 kernel_size: 3 214 weight_filler { 215 type: "xavier" 216 } 217 bias_filler { 218 type: "constant" 219 value: 0 220 } 221 } 222} 223layer { 224 name: "relu3_2" 225 type: "ReLU" 226 bottom: "conv3_2" 227 top: "conv3_2" 228} 229layer { 230 name: "conv3_3" 231 type: "Convolution" 232 bottom: "conv3_2" 233 top: "conv3_3" 234 param { 235 lr_mult: 1 236 decay_mult: 1 237 } 238 param { 239 lr_mult: 2 240 decay_mult: 0 241 } 242 convolution_param { 243 num_output: 256 244 pad: 1 245 kernel_size: 3 246 weight_filler { 247 type: "xavier" 248 } 249 bias_filler { 250 type: "constant" 251 value: 0 252 } 253 } 254} 255layer { 256 name: "relu3_3" 257 type: "ReLU" 258 bottom: "conv3_3" 259 top: "conv3_3" 260} 261layer { 262 name: "pool3" 263 type: "Pooling" 264 bottom: "conv3_3" 265 top: "pool3" 266 pooling_param { 267 pool: MAX 268 kernel_size: 2 269 stride: 2 270 } 271} 272layer { 273 name: "conv4_1" 274 type: "Convolution" 275 bottom: "pool3" 276 top: "conv4_1" 277 param { 278 lr_mult: 1 279 decay_mult: 1 280 } 281 param { 282 lr_mult: 2 283 decay_mult: 0 284 } 285 convolution_param { 286 num_output: 512 287 pad: 1 288 kernel_size: 3 289 weight_filler { 290 type: "xavier" 291 } 292 bias_filler { 293 type: "constant" 294 value: 0 295 } 296 } 297} 298layer { 299 name: "relu4_1" 300 type: "ReLU" 301 bottom: "conv4_1" 302 top: "conv4_1" 303} 304layer { 305 name: "conv4_2" 306 type: "Convolution" 307 bottom: "conv4_1" 308 top: "conv4_2" 309 param { 310 lr_mult: 1 311 decay_mult: 1 312 } 313 param { 314 lr_mult: 2 315 decay_mult: 0 316 } 317 convolution_param { 318 num_output: 512 319 pad: 1 320 kernel_size: 3 321 weight_filler { 322 type: "xavier" 323 } 324 bias_filler { 325 type: "constant" 326 value: 0 327 } 328 } 329} 330layer { 331 name: "relu4_2" 332 type: "ReLU" 333 bottom: "conv4_2" 334 top: "conv4_2" 335} 336layer { 337 name: "conv4_3" 338 type: "Convolution" 339 bottom: "conv4_2" 340 top: "conv4_3" 341 param { 342 lr_mult: 1 343 decay_mult: 1 344 } 345 param { 346 lr_mult: 2 347 decay_mult: 0 348 } 349 convolution_param { 350 num_output: 512 351 pad: 1 352 kernel_size: 3 353 weight_filler { 354 type: "xavier" 355 } 356 bias_filler { 357 type: "constant" 358 value: 0 359 } 360 } 361} 362layer { 363 name: "relu4_3" 364 type: "ReLU" 365 bottom: "conv4_3" 366 top: "conv4_3" 367} 368layer { 369 name: "pool4" 370 type: "Pooling" 371 bottom: "conv4_3" 372 top: "pool4" 373 pooling_param { 374 pool: MAX 375 kernel_size: 2 376 stride: 2 377 } 378} 379layer { 380 name: "conv5_1" 381 type: "Convolution" 382 bottom: "pool4" 383 top: "conv5_1" 384 param { 385 lr_mult: 1 386 decay_mult: 1 387 } 388 param { 389 lr_mult: 2 390 decay_mult: 0 391 } 392 convolution_param { 393 num_output: 512 394 pad: 1 395 kernel_size: 3 396 weight_filler { 397 type: "xavier" 398 } 399 bias_filler { 400 type: "constant" 401 value: 0 402 } 403 dilation: 1 404 } 405} 406layer { 407 name: "relu5_1" 408 type: "ReLU" 409 bottom: "conv5_1" 410 top: "conv5_1" 411} 412layer { 413 name: "conv5_2" 414 type: "Convolution" 415 bottom: "conv5_1" 416 top: "conv5_2" 417 param { 418 lr_mult: 1 419 decay_mult: 1 420 } 421 param { 422 lr_mult: 2 423 decay_mult: 0 424 } 425 convolution_param { 426 num_output: 512 427 pad: 1 428 kernel_size: 3 429 weight_filler { 430 type: "xavier" 431 } 432 bias_filler { 433 type: "constant" 434 value: 0 435 } 436 dilation: 1 437 } 438} 439layer { 440 name: "relu5_2" 441 type: "ReLU" 442 bottom: "conv5_2" 443 top: "conv5_2" 444} 445layer { 446 name: "conv5_3" 447 type: "Convolution" 448 bottom: "conv5_2" 449 top: "conv5_3" 450 param { 451 lr_mult: 1 452 decay_mult: 1 453 } 454 param { 455 lr_mult: 2 456 decay_mult: 0 457 } 458 convolution_param { 459 num_output: 512 460 pad: 1 461 kernel_size: 3 462 weight_filler { 463 type: "xavier" 464 } 465 bias_filler { 466 type: "constant" 467 value: 0 468 } 469 dilation: 1 470 } 471} 472layer { 473 name: "relu5_3" 474 type: "ReLU" 475 bottom: "conv5_3" 476 top: "conv5_3" 477} 478layer { 479 name: "pool5" 480 type: "Pooling" 481 bottom: "conv5_3" 482 top: "pool5" 483 pooling_param { 484 pool: MAX 485 kernel_size: 3 486 stride: 1 487 pad: 1 488 } 489} 490layer { 491 name: "fc6" 492 type: "Convolution" 493 bottom: "pool5" 494 top: "fc6" 495 param { 496 lr_mult: 1 497 decay_mult: 1 498 } 499 param { 500 lr_mult: 2 501 decay_mult: 0 502 } 503 convolution_param { 504 num_output: 1024 505 pad: 6 506 kernel_size: 3 507 weight_filler { 508 type: "xavier" 509 } 510 bias_filler { 511 type: "constant" 512 value: 0 513 } 514 dilation: 6 515 } 516} 517layer { 518 name: "relu6" 519 type: "ReLU" 520 bottom: "fc6" 521 top: "fc6" 522} 523layer { 524 name: "fc7" 525 type: "Convolution" 526 bottom: "fc6" 527 top: "fc7" 528 param { 529 lr_mult: 1 530 decay_mult: 1 531 } 532 param { 533 lr_mult: 2 534 decay_mult: 0 535 } 536 convolution_param { 537 num_output: 1024 538 kernel_size: 1 539 weight_filler { 540 type: "xavier" 541 } 542 bias_filler { 543 type: "constant" 544 value: 0 545 } 546 } 547} 548layer { 549 name: "relu7" 550 type: "ReLU" 551 bottom: "fc7" 552 top: "fc7" 553} 554layer { 555 name: "conv6_1" 556 type: "Convolution" 557 bottom: "fc7" 558 top: "conv6_1" 559 param { 560 lr_mult: 1 561 decay_mult: 1 562 } 563 param { 564 lr_mult: 2 565 decay_mult: 0 566 } 567 convolution_param { 568 num_output: 256 569 pad: 0 570 kernel_size: 1 571 stride: 1 572 weight_filler { 573 type: "xavier" 574 } 575 bias_filler { 576 type: "constant" 577 value: 0 578 } 579 } 580} 581layer { 582 name: "conv6_1_relu" 583 type: "ReLU" 584 bottom: "conv6_1" 585 top: "conv6_1" 586} 587layer { 588 name: "conv6_2" 589 type: "Convolution" 590 bottom: "conv6_1" 591 top: "conv6_2" 592 param { 593 lr_mult: 1 594 decay_mult: 1 595 } 596 param { 597 lr_mult: 2 598 decay_mult: 0 599 } 600 convolution_param { 601 num_output: 512 602 pad: 1 603 kernel_size: 3 604 stride: 2 605 weight_filler { 606 type: "xavier" 607 } 608 bias_filler { 609 type: "constant" 610 value: 0 611 } 612 } 613} 614layer { 615 name: "conv6_2_relu" 616 type: "ReLU" 617 bottom: "conv6_2" 618 top: "conv6_2" 619} 620layer { 621 name: "conv7_1" 622 type: "Convolution" 623 bottom: "conv6_2" 624 top: "conv7_1" 625 param { 626 lr_mult: 1 627 decay_mult: 1 628 } 629 param { 630 lr_mult: 2 631 decay_mult: 0 632 } 633 convolution_param { 634 num_output: 128 635 pad: 0 636 kernel_size: 1 637 stride: 1 638 weight_filler { 639 type: "xavier" 640 } 641 bias_filler { 642 type: "constant" 643 value: 0 644 } 645 } 646} 647layer { 648 name: "conv7_1_relu" 649 type: "ReLU" 650 bottom: "conv7_1" 651 top: "conv7_1" 652} 653layer { 654 name: "conv7_2" 655 type: "Convolution" 656 bottom: "conv7_1" 657 top: "conv7_2" 658 param { 659 lr_mult: 1 660 decay_mult: 1 661 } 662 param { 663 lr_mult: 2 664 decay_mult: 0 665 } 666 convolution_param { 667 num_output: 256 668 pad: 1 669 kernel_size: 3 670 stride: 2 671 weight_filler { 672 type: "xavier" 673 } 674 bias_filler { 675 type: "constant" 676 value: 0 677 } 678 } 679} 680layer { 681 name: "conv7_2_relu" 682 type: "ReLU" 683 bottom: "conv7_2" 684 top: "conv7_2" 685} 686layer { 687 name: "conv8_1" 688 type: "Convolution" 689 bottom: "conv7_2" 690 top: "conv8_1" 691 param { 692 lr_mult: 1 693 decay_mult: 1 694 } 695 param { 696 lr_mult: 2 697 decay_mult: 0 698 } 699 convolution_param { 700 num_output: 128 701 pad: 0 702 kernel_size: 1 703 stride: 1 704 weight_filler { 705 type: "xavier" 706 } 707 bias_filler { 708 type: "constant" 709 value: 0 710 } 711 } 712} 713layer { 714 name: "conv8_1_relu" 715 type: "ReLU" 716 bottom: "conv8_1" 717 top: "conv8_1" 718} 719layer { 720 name: "conv8_2" 721 type: "Convolution" 722 bottom: "conv8_1" 723 top: "conv8_2" 724 param { 725 lr_mult: 1 726 decay_mult: 1 727 } 728 param { 729 lr_mult: 2 730 decay_mult: 0 731 } 732 convolution_param { 733 num_output: 256 734 pad: 0 735 kernel_size: 3 736 stride: 1 737 weight_filler { 738 type: "xavier" 739 } 740 bias_filler { 741 type: "constant" 742 value: 0 743 } 744 } 745} 746layer { 747 name: "conv8_2_relu" 748 type: "ReLU" 749 bottom: "conv8_2" 750 top: "conv8_2" 751} 752layer { 753 name: "conv9_1" 754 type: "Convolution" 755 bottom: "conv8_2" 756 top: "conv9_1" 757 param { 758 lr_mult: 1 759 decay_mult: 1 760 } 761 param { 762 lr_mult: 2 763 decay_mult: 0 764 } 765 convolution_param { 766 num_output: 128 767 pad: 0 768 kernel_size: 1 769 stride: 1 770 weight_filler { 771 type: "xavier" 772 } 773 bias_filler { 774 type: "constant" 775 value: 0 776 } 777 } 778} 779layer { 780 name: "conv9_1_relu" 781 type: "ReLU" 782 bottom: "conv9_1" 783 top: "conv9_1" 784} 785layer { 786 name: "conv9_2" 787 type: "Convolution" 788 bottom: "conv9_1" 789 top: "conv9_2" 790 param { 791 lr_mult: 1 792 decay_mult: 1 793 } 794 param { 795 lr_mult: 2 796 decay_mult: 0 797 } 798 convolution_param { 799 num_output: 256 800 pad: 0 801 kernel_size: 3 802 stride: 1 803 weight_filler { 804 type: "xavier" 805 } 806 bias_filler { 807 type: "constant" 808 value: 0 809 } 810 } 811} 812layer { 813 name: "conv9_2_relu" 814 type: "ReLU" 815 bottom: "conv9_2" 816 top: "conv9_2" 817} 818layer { 819 name: "conv4_3_norm" 820 type: "Normalize" 821 bottom: "conv4_3" 822 top: "conv4_3_norm" 823 norm_param { 824 across_spatial: false 825 scale_filler { 826 type: "constant" 827 value: 20 828 } 829 channel_shared: false 830 } 831} 832layer { 833 name: "conv4_3_norm_mbox_loc" 834 type: "Convolution" 835 bottom: "conv4_3_norm" 836 top: "conv4_3_norm_mbox_loc" 837 param { 838 lr_mult: 1 839 decay_mult: 1 840 } 841 param { 842 lr_mult: 2 843 decay_mult: 0 844 } 845 convolution_param { 846 num_output: 16 847 pad: 1 848 kernel_size: 3 849 stride: 1 850 weight_filler { 851 type: "xavier" 852 } 853 bias_filler { 854 type: "constant" 855 value: 0 856 } 857 } 858} 859layer { 860 name: "conv4_3_norm_mbox_loc_perm" 861 type: "Permute" 862 bottom: "conv4_3_norm_mbox_loc" 863 top: "conv4_3_norm_mbox_loc_perm" 864 permute_param { 865 order: 0 866 order: 2 867 order: 3 868 order: 1 869 } 870} 871layer { 872 name: "conv4_3_norm_mbox_loc_flat" 873 type: "Flatten" 874 bottom: "conv4_3_norm_mbox_loc_perm" 875 top: "conv4_3_norm_mbox_loc_flat" 876 flatten_param { 877 axis: 1 878 } 879} 880layer { 881 name: "conv4_3_norm_mbox_conf" 882 type: "Convolution" 883 bottom: "conv4_3_norm" 884 top: "conv4_3_norm_mbox_conf" 885 param { 886 lr_mult: 1 887 decay_mult: 1 888 } 889 param { 890 lr_mult: 2 891 decay_mult: 0 892 } 893 convolution_param { 894 num_output: 84 895 pad: 1 896 kernel_size: 3 897 stride: 1 898 weight_filler { 899 type: "xavier" 900 } 901 bias_filler { 902 type: "constant" 903 value: 0 904 } 905 } 906} 907layer { 908 name: "conv4_3_norm_mbox_conf_perm" 909 type: "Permute" 910 bottom: "conv4_3_norm_mbox_conf" 911 top: "conv4_3_norm_mbox_conf_perm" 912 permute_param { 913 order: 0 914 order: 2 915 order: 3 916 order: 1 917 } 918} 919layer { 920 name: "conv4_3_norm_mbox_conf_flat" 921 type: "Flatten" 922 bottom: "conv4_3_norm_mbox_conf_perm" 923 top: "conv4_3_norm_mbox_conf_flat" 924 flatten_param { 925 axis: 1 926 } 927} 928layer { 929 name: "conv4_3_norm_mbox_priorbox" 930 type: "PriorBox" 931 bottom: "conv4_3_norm" 932 bottom: "data" 933 top: "conv4_3_norm_mbox_priorbox" 934 prior_box_param { 935 min_size: 30.0 936 max_size: 60.0 937 aspect_ratio: 2 938 flip: true 939 clip: false 940 variance: 0.1 941 variance: 0.1 942 variance: 0.2 943 variance: 0.2 944 step: 8 945 offset: 0.5 946 } 947} 948layer { 949 name: "fc7_mbox_loc" 950 type: "Convolution" 951 bottom: "fc7" 952 top: "fc7_mbox_loc" 953 param { 954 lr_mult: 1 955 decay_mult: 1 956 } 957 param { 958 lr_mult: 2 959 decay_mult: 0 960 } 961 convolution_param { 962 num_output: 24 963 pad: 1 964 kernel_size: 3 965 stride: 1 966 weight_filler { 967 type: "xavier" 968 } 969 bias_filler { 970 type: "constant" 971 value: 0 972 } 973 } 974} 975layer { 976 name: "fc7_mbox_loc_perm" 977 type: "Permute" 978 bottom: "fc7_mbox_loc" 979 top: "fc7_mbox_loc_perm" 980 permute_param { 981 order: 0 982 order: 2 983 order: 3 984 order: 1 985 } 986} 987layer { 988 name: "fc7_mbox_loc_flat" 989 type: "Flatten" 990 bottom: "fc7_mbox_loc_perm" 991 top: "fc7_mbox_loc_flat" 992 flatten_param { 993 axis: 1 994 } 995} 996layer { 997 name: "fc7_mbox_conf" 998 type: "Convolution" 999 bottom: "fc7" 1000 top: "fc7_mbox_conf" 1001 param { 1002 lr_mult: 1 1003 decay_mult: 1 1004 } 1005 param { 1006 lr_mult: 2 1007 decay_mult: 0 1008 } 1009 convolution_param { 1010 num_output: 126 1011 pad: 1 1012 kernel_size: 3 1013 stride: 1 1014 weight_filler { 1015 type: "xavier" 1016 } 1017 bias_filler { 1018 type: "constant" 1019 value: 0 1020 } 1021 } 1022} 1023layer { 1024 name: "fc7_mbox_conf_perm" 1025 type: "Permute" 1026 bottom: "fc7_mbox_conf" 1027 top: "fc7_mbox_conf_perm" 1028 permute_param { 1029 order: 0 1030 order: 2 1031 order: 3 1032 order: 1 1033 } 1034} 1035layer { 1036 name: "fc7_mbox_conf_flat" 1037 type: "Flatten" 1038 bottom: "fc7_mbox_conf_perm" 1039 top: "fc7_mbox_conf_flat" 1040 flatten_param { 1041 axis: 1 1042 } 1043} 1044layer { 1045 name: "fc7_mbox_priorbox" 1046 type: "PriorBox" 1047 bottom: "fc7" 1048 bottom: "data" 1049 top: "fc7_mbox_priorbox" 1050 prior_box_param { 1051 min_size: 60.0 1052 max_size: 111.0 1053 aspect_ratio: 2 1054 aspect_ratio: 3 1055 flip: true 1056 clip: false 1057 variance: 0.1 1058 variance: 0.1 1059 variance: 0.2 1060 variance: 0.2 1061 step: 16 1062 offset: 0.5 1063 } 1064} 1065layer { 1066 name: "conv6_2_mbox_loc" 1067 type: "Convolution" 1068 bottom: "conv6_2" 1069 top: "conv6_2_mbox_loc" 1070 param { 1071 lr_mult: 1 1072 decay_mult: 1 1073 } 1074 param { 1075 lr_mult: 2 1076 decay_mult: 0 1077 } 1078 convolution_param { 1079 num_output: 24 1080 pad: 1 1081 kernel_size: 3 1082 stride: 1 1083 weight_filler { 1084 type: "xavier" 1085 } 1086 bias_filler { 1087 type: "constant" 1088 value: 0 1089 } 1090 } 1091} 1092layer { 1093 name: "conv6_2_mbox_loc_perm" 1094 type: "Permute" 1095 bottom: "conv6_2_mbox_loc" 1096 top: "conv6_2_mbox_loc_perm" 1097 permute_param { 1098 order: 0 1099 order: 2 1100 order: 3 1101 order: 1 1102 } 1103} 1104layer { 1105 name: "conv6_2_mbox_loc_flat" 1106 type: "Flatten" 1107 bottom: "conv6_2_mbox_loc_perm" 1108 top: "conv6_2_mbox_loc_flat" 1109 flatten_param { 1110 axis: 1 1111 } 1112} 1113layer { 1114 name: "conv6_2_mbox_conf" 1115 type: "Convolution" 1116 bottom: "conv6_2" 1117 top: "conv6_2_mbox_conf" 1118 param { 1119 lr_mult: 1 1120 decay_mult: 1 1121 } 1122 param { 1123 lr_mult: 2 1124 decay_mult: 0 1125 } 1126 convolution_param { 1127 num_output: 126 1128 pad: 1 1129 kernel_size: 3 1130 stride: 1 1131 weight_filler { 1132 type: "xavier" 1133 } 1134 bias_filler { 1135 type: "constant" 1136 value: 0 1137 } 1138 } 1139} 1140layer { 1141 name: "conv6_2_mbox_conf_perm" 1142 type: "Permute" 1143 bottom: "conv6_2_mbox_conf" 1144 top: "conv6_2_mbox_conf_perm" 1145 permute_param { 1146 order: 0 1147 order: 2 1148 order: 3 1149 order: 1 1150 } 1151} 1152layer { 1153 name: "conv6_2_mbox_conf_flat" 1154 type: "Flatten" 1155 bottom: "conv6_2_mbox_conf_perm" 1156 top: "conv6_2_mbox_conf_flat" 1157 flatten_param { 1158 axis: 1 1159 } 1160} 1161layer { 1162 name: "conv6_2_mbox_priorbox" 1163 type: "PriorBox" 1164 bottom: "conv6_2" 1165 bottom: "data" 1166 top: "conv6_2_mbox_priorbox" 1167 prior_box_param { 1168 min_size: 111.0 1169 max_size: 162.0 1170 aspect_ratio: 2 1171 aspect_ratio: 3 1172 flip: true 1173 clip: false 1174 variance: 0.1 1175 variance: 0.1 1176 variance: 0.2 1177 variance: 0.2 1178 step: 32 1179 offset: 0.5 1180 } 1181} 1182layer { 1183 name: "conv7_2_mbox_loc" 1184 type: "Convolution" 1185 bottom: "conv7_2" 1186 top: "conv7_2_mbox_loc" 1187 param { 1188 lr_mult: 1 1189 decay_mult: 1 1190 } 1191 param { 1192 lr_mult: 2 1193 decay_mult: 0 1194 } 1195 convolution_param { 1196 num_output: 24 1197 pad: 1 1198 kernel_size: 3 1199 stride: 1 1200 weight_filler { 1201 type: "xavier" 1202 } 1203 bias_filler { 1204 type: "constant" 1205 value: 0 1206 } 1207 } 1208} 1209layer { 1210 name: "conv7_2_mbox_loc_perm" 1211 type: "Permute" 1212 bottom: "conv7_2_mbox_loc" 1213 top: "conv7_2_mbox_loc_perm" 1214 permute_param { 1215 order: 0 1216 order: 2 1217 order: 3 1218 order: 1 1219 } 1220} 1221layer { 1222 name: "conv7_2_mbox_loc_flat" 1223 type: "Flatten" 1224 bottom: "conv7_2_mbox_loc_perm" 1225 top: "conv7_2_mbox_loc_flat" 1226 flatten_param { 1227 axis: 1 1228 } 1229} 1230layer { 1231 name: "conv7_2_mbox_conf" 1232 type: "Convolution" 1233 bottom: "conv7_2" 1234 top: "conv7_2_mbox_conf" 1235 param { 1236 lr_mult: 1 1237 decay_mult: 1 1238 } 1239 param { 1240 lr_mult: 2 1241 decay_mult: 0 1242 } 1243 convolution_param { 1244 num_output: 126 1245 pad: 1 1246 kernel_size: 3 1247 stride: 1 1248 weight_filler { 1249 type: "xavier" 1250 } 1251 bias_filler { 1252 type: "constant" 1253 value: 0 1254 } 1255 } 1256} 1257layer { 1258 name: "conv7_2_mbox_conf_perm" 1259 type: "Permute" 1260 bottom: "conv7_2_mbox_conf" 1261 top: "conv7_2_mbox_conf_perm" 1262 permute_param { 1263 order: 0 1264 order: 2 1265 order: 3 1266 order: 1 1267 } 1268} 1269layer { 1270 name: "conv7_2_mbox_conf_flat" 1271 type: "Flatten" 1272 bottom: "conv7_2_mbox_conf_perm" 1273 top: "conv7_2_mbox_conf_flat" 1274 flatten_param { 1275 axis: 1 1276 } 1277} 1278layer { 1279 name: "conv7_2_mbox_priorbox" 1280 type: "PriorBox" 1281 bottom: "conv7_2" 1282 bottom: "data" 1283 top: "conv7_2_mbox_priorbox" 1284 prior_box_param { 1285 min_size: 162.0 1286 max_size: 213.0 1287 aspect_ratio: 2 1288 aspect_ratio: 3 1289 flip: true 1290 clip: false 1291 variance: 0.1 1292 variance: 0.1 1293 variance: 0.2 1294 variance: 0.2 1295 step: 64 1296 offset: 0.5 1297 } 1298} 1299layer { 1300 name: "conv8_2_mbox_loc" 1301 type: "Convolution" 1302 bottom: "conv8_2" 1303 top: "conv8_2_mbox_loc" 1304 param { 1305 lr_mult: 1 1306 decay_mult: 1 1307 } 1308 param { 1309 lr_mult: 2 1310 decay_mult: 0 1311 } 1312 convolution_param { 1313 num_output: 16 1314 pad: 1 1315 kernel_size: 3 1316 stride: 1 1317 weight_filler { 1318 type: "xavier" 1319 } 1320 bias_filler { 1321 type: "constant" 1322 value: 0 1323 } 1324 } 1325} 1326layer { 1327 name: "conv8_2_mbox_loc_perm" 1328 type: "Permute" 1329 bottom: "conv8_2_mbox_loc" 1330 top: "conv8_2_mbox_loc_perm" 1331 permute_param { 1332 order: 0 1333 order: 2 1334 order: 3 1335 order: 1 1336 } 1337} 1338layer { 1339 name: "conv8_2_mbox_loc_flat" 1340 type: "Flatten" 1341 bottom: "conv8_2_mbox_loc_perm" 1342 top: "conv8_2_mbox_loc_flat" 1343 flatten_param { 1344 axis: 1 1345 } 1346} 1347layer { 1348 name: "conv8_2_mbox_conf" 1349 type: "Convolution" 1350 bottom: "conv8_2" 1351 top: "conv8_2_mbox_conf" 1352 param { 1353 lr_mult: 1 1354 decay_mult: 1 1355 } 1356 param { 1357 lr_mult: 2 1358 decay_mult: 0 1359 } 1360 convolution_param { 1361 num_output: 84 1362 pad: 1 1363 kernel_size: 3 1364 stride: 1 1365 weight_filler { 1366 type: "xavier" 1367 } 1368 bias_filler { 1369 type: "constant" 1370 value: 0 1371 } 1372 } 1373} 1374layer { 1375 name: "conv8_2_mbox_conf_perm" 1376 type: "Permute" 1377 bottom: "conv8_2_mbox_conf" 1378 top: "conv8_2_mbox_conf_perm" 1379 permute_param { 1380 order: 0 1381 order: 2 1382 order: 3 1383 order: 1 1384 } 1385} 1386layer { 1387 name: "conv8_2_mbox_conf_flat" 1388 type: "Flatten" 1389 bottom: "conv8_2_mbox_conf_perm" 1390 top: "conv8_2_mbox_conf_flat" 1391 flatten_param { 1392 axis: 1 1393 } 1394} 1395layer { 1396 name: "conv8_2_mbox_priorbox" 1397 type: "PriorBox" 1398 bottom: "conv8_2" 1399 bottom: "data" 1400 top: "conv8_2_mbox_priorbox" 1401 prior_box_param { 1402 min_size: 213.0 1403 max_size: 264.0 1404 aspect_ratio: 2 1405 flip: true 1406 clip: false 1407 variance: 0.1 1408 variance: 0.1 1409 variance: 0.2 1410 variance: 0.2 1411 step: 100 1412 offset: 0.5 1413 } 1414} 1415layer { 1416 name: "conv9_2_mbox_loc" 1417 type: "Convolution" 1418 bottom: "conv9_2" 1419 top: "conv9_2_mbox_loc" 1420 param { 1421 lr_mult: 1 1422 decay_mult: 1 1423 } 1424 param { 1425 lr_mult: 2 1426 decay_mult: 0 1427 } 1428 convolution_param { 1429 num_output: 16 1430 pad: 1 1431 kernel_size: 3 1432 stride: 1 1433 weight_filler { 1434 type: "xavier" 1435 } 1436 bias_filler { 1437 type: "constant" 1438 value: 0 1439 } 1440 } 1441} 1442layer { 1443 name: "conv9_2_mbox_loc_perm" 1444 type: "Permute" 1445 bottom: "conv9_2_mbox_loc" 1446 top: "conv9_2_mbox_loc_perm" 1447 permute_param { 1448 order: 0 1449 order: 2 1450 order: 3 1451 order: 1 1452 } 1453} 1454layer { 1455 name: "conv9_2_mbox_loc_flat" 1456 type: "Flatten" 1457 bottom: "conv9_2_mbox_loc_perm" 1458 top: "conv9_2_mbox_loc_flat" 1459 flatten_param { 1460 axis: 1 1461 } 1462} 1463layer { 1464 name: "conv9_2_mbox_conf" 1465 type: "Convolution" 1466 bottom: "conv9_2" 1467 top: "conv9_2_mbox_conf" 1468 param { 1469 lr_mult: 1 1470 decay_mult: 1 1471 } 1472 param { 1473 lr_mult: 2 1474 decay_mult: 0 1475 } 1476 convolution_param { 1477 num_output: 84 1478 pad: 1 1479 kernel_size: 3 1480 stride: 1 1481 weight_filler { 1482 type: "xavier" 1483 } 1484 bias_filler { 1485 type: "constant" 1486 value: 0 1487 } 1488 } 1489} 1490layer { 1491 name: "conv9_2_mbox_conf_perm" 1492 type: "Permute" 1493 bottom: "conv9_2_mbox_conf" 1494 top: "conv9_2_mbox_conf_perm" 1495 permute_param { 1496 order: 0 1497 order: 2 1498 order: 3 1499 order: 1 1500 } 1501} 1502layer { 1503 name: "conv9_2_mbox_conf_flat" 1504 type: "Flatten" 1505 bottom: "conv9_2_mbox_conf_perm" 1506 top: "conv9_2_mbox_conf_flat" 1507 flatten_param { 1508 axis: 1 1509 } 1510} 1511layer { 1512 name: "conv9_2_mbox_priorbox" 1513 type: "PriorBox" 1514 bottom: "conv9_2" 1515 bottom: "data" 1516 top: "conv9_2_mbox_priorbox" 1517 prior_box_param { 1518 min_size: 264.0 1519 max_size: 315.0 1520 aspect_ratio: 2 1521 flip: true 1522 clip: false 1523 variance: 0.1 1524 variance: 0.1 1525 variance: 0.2 1526 variance: 0.2 1527 step: 300 1528 offset: 0.5 1529 } 1530} 1531layer { 1532 name: "mbox_loc" 1533 type: "Concat" 1534 bottom: "conv4_3_norm_mbox_loc_flat" 1535 bottom: "fc7_mbox_loc_flat" 1536 bottom: "conv6_2_mbox_loc_flat" 1537 bottom: "conv7_2_mbox_loc_flat" 1538 bottom: "conv8_2_mbox_loc_flat" 1539 bottom: "conv9_2_mbox_loc_flat" 1540 top: "mbox_loc" 1541 concat_param { 1542 axis: 1 1543 } 1544} 1545layer { 1546 name: "mbox_conf" 1547 type: "Concat" 1548 bottom: "conv4_3_norm_mbox_conf_flat" 1549 bottom: "fc7_mbox_conf_flat" 1550 bottom: "conv6_2_mbox_conf_flat" 1551 bottom: "conv7_2_mbox_conf_flat" 1552 bottom: "conv8_2_mbox_conf_flat" 1553 bottom: "conv9_2_mbox_conf_flat" 1554 top: "mbox_conf" 1555 concat_param { 1556 axis: 1 1557 } 1558} 1559layer { 1560 name: "mbox_priorbox" 1561 type: "Concat" 1562 bottom: "conv4_3_norm_mbox_priorbox" 1563 bottom: "fc7_mbox_priorbox" 1564 bottom: "conv6_2_mbox_priorbox" 1565 bottom: "conv7_2_mbox_priorbox" 1566 bottom: "conv8_2_mbox_priorbox" 1567 bottom: "conv9_2_mbox_priorbox" 1568 top: "mbox_priorbox" 1569 concat_param { 1570 axis: 2 1571 } 1572} 1573layer { 1574 name: "mbox_conf_reshape" 1575 type: "Reshape" 1576 bottom: "mbox_conf" 1577 top: "mbox_conf_reshape" 1578 reshape_param { 1579 shape { 1580 dim: 0 1581 dim: -1 1582 dim: 21 1583 } 1584 } 1585} 1586layer { 1587 name: "mbox_conf_softmax" 1588 type: "Softmax" 1589 bottom: "mbox_conf_reshape" 1590 top: "mbox_conf_softmax" 1591 softmax_param { 1592 axis: 2 1593 } 1594} 1595layer { 1596 name: "mbox_conf_flatten" 1597 type: "Flatten" 1598 bottom: "mbox_conf_softmax" 1599 top: "mbox_conf_flatten" 1600 flatten_param { 1601 axis: 1 1602 } 1603} 1604 1605 1606