diff --git a/dp-core/vr_interface.c b/dp-core/vr_interface.c index 2a14405f4..7d431228b 100644 --- a/dp-core/vr_interface.c +++ b/dp-core/vr_interface.c @@ -1806,12 +1806,15 @@ vr_interface_add(vr_interface_req *req, bool need_response) } static void -vr_interface_make_req(vr_interface_req *req, struct vr_interface *intf) +vr_interface_make_req(vr_interface_req *req, struct vr_interface *intf, + unsigned int core) { unsigned int i; struct vr_interface_stats *stats; struct vr_interface_settings settings; + int real_core; + req->vifr_core = core; req->vifr_type = intf->vif_type; req->vifr_flags = intf->vif_flags; req->vifr_vrf = intf->vif_vrf; @@ -1860,15 +1863,79 @@ vr_interface_make_req(vr_interface_req *req, struct vr_interface *intf) req->vifr_obytes = 0; req->vifr_opackets = 0; req->vifr_oerrors = 0; - - for (i = 0; i < vr_num_cpus; i++) { - stats = vif_get_stats(intf, i); - req->vifr_ibytes += stats->vis_ibytes; - req->vifr_ipackets += stats->vis_ipackets; - req->vifr_ierrors += stats->vis_ierrors; - req->vifr_obytes += stats->vis_obytes; - req->vifr_opackets += stats->vis_opackets; - req->vifr_oerrors += stats->vis_oerrors; + req->vifr_ifenqpkts = 0; + req->vifr_ifenqdrops = 0; + req->vifr_ifdeqpkts = 0; + req->vifr_ifdeqdrops = 0; + req->vifr_iftxrngenqpkts = 0; + req->vifr_iftxrngenqdrops = 0; + req->vifr_ifrxrngenqpkts = 0; + req->vifr_ifrxrngenqdrops = 0; + + /** + * Implementation of getting per-core vif statistics is based on this + * little trick to avoid making changes in how agent makes requests for + * statistics. From vRouter's and agent's point of view, request for stats + * for 0th core means a request for stats summed up for all the cores. + * So cores are enumerated starting with 1. + * Meanwhile, from user's point of view they are enumerated starting with 0 + * (e.g. vif --list --core 0 means 'vif stats for the very first (0th) + * core'). This is how Linux enumerates CPUs, so it should be more + * intuitive for the user. + * + * Agent is not aware of possibility of asking for per-core stats. Its + * requests have vifr_core implicitly set to 0. So we need to make a + * conversion between those enumerating systems. The vif utility increments + * by 1 the core number user asked for. Then this modified requests comes + * here. See the comment below. + */ + if (req->vifr_core == 0) { /* user or agent wants summed up stats */ + for (i = 0; i < vr_num_cpus; i++) { + stats = vif_get_stats(intf, i); + req->vifr_ibytes += stats->vis_ibytes; + req->vifr_ipackets += stats->vis_ipackets; + req->vifr_ierrors += stats->vis_ierrors; + req->vifr_obytes += stats->vis_obytes; + req->vifr_opackets += stats->vis_opackets; + req->vifr_oerrors += stats->vis_oerrors; + req->vifr_ifenqpkts += stats->vis_ifenqpkts; + req->vifr_ifdeqpkts += stats->vis_ifdeqpkts; + req->vifr_ifdeqdrops += stats->vis_ifdeqdrops; + req->vifr_ifenqdrops += stats->vis_ifenqdrops; + req->vifr_iftxrngenqpkts += stats->vis_iftxrngenqpkts; + req->vifr_iftxrngenqdrops += stats->vis_iftxrngenqdrops; + req->vifr_ifrxrngenqpkts += stats->vis_ifrxrngenqpkts; + req->vifr_ifrxrngenqdrops += stats->vis_ifrxrngenqdrops; + } + /* user wants stats for a specific core */ + } else if (req->vifr_core > 0 && req->vifr_core <= vr_num_cpus) { + /** + * real_core is what user really asks for. + * If a request came for stats for 1st core, it means user + * asked for 0th core. If request was made for 2nd, user wanted + * the 1st, and so on. + * + * TODO: This would be much simplier if agent could explicitly ask + * for stats for 'minus 1st' core, meaning 'all the cores'. + */ + real_core = req->vifr_core; + real_core--; + stats = vif_get_stats(intf, real_core); + + req->vifr_ibytes = stats->vis_ibytes; + req->vifr_ipackets = stats->vis_ipackets; + req->vifr_ierrors = stats->vis_ierrors; + req->vifr_obytes = stats->vis_obytes; + req->vifr_opackets = stats->vis_opackets; + req->vifr_oerrors = stats->vis_oerrors; + req->vifr_ifenqpkts = stats->vis_ifenqpkts; + req->vifr_ifenqdrops = stats->vis_ifenqdrops; + req->vifr_ifdeqpkts = stats->vis_ifdeqpkts; + req->vifr_ifdeqdrops = stats->vis_ifdeqdrops; + req->vifr_iftxrngenqpkts = stats->vis_iftxrngenqpkts; + req->vifr_iftxrngenqdrops = stats->vis_iftxrngenqdrops; + req->vifr_ifrxrngenqpkts = stats->vis_ifrxrngenqpkts; + req->vifr_ifrxrngenqdrops = stats->vis_ifrxrngenqdrops; } req->vifr_speed = -1; @@ -1935,6 +2002,18 @@ vr_interface_get(vr_interface_req *req) struct vr_interface *vif = NULL; struct vrouter *router; vr_interface_req *resp = NULL; + unsigned int core; + + /** + * Check if requested core number is sane. If not, let's assume the + * request was made for summed up stats for all the cores. + */ + if (req->vifr_core > 0 && req->vifr_core <= vr_num_cpus) { + core = req->vifr_core; + } else { + core = 0; + } + router = vrouter_get(req->vifr_rid); if (!router) { @@ -1954,7 +2033,7 @@ vr_interface_get(vr_interface_req *req) goto generate_response; } - vr_interface_make_req(resp, vif); + vr_interface_make_req(resp, vif, core); } else ret = -ENOENT; @@ -1974,8 +2053,19 @@ vr_interface_dump(vr_interface_req *r) vr_interface_req *resp = NULL; struct vr_interface *vif; struct vrouter *router = vrouter_get(r->vifr_vrf); + unsigned int core; struct vr_message_dumper *dumper = NULL; + /** + * Check if requested core number is sane. If not, let's assume the + * request was made for summed up stats for all the cores. + */ + if (r->vifr_core > 0 && r->vifr_core <= vr_num_cpus) { + core = r->vifr_core; + } else { + core = 0; + } + if (!router && (ret = -ENODEV)) goto generate_response; @@ -1998,7 +2088,7 @@ vr_interface_dump(vr_interface_req *r) i < router->vr_max_interfaces; i++) { vif = router->vr_interfaces[i]; if (vif) { - vr_interface_make_req(resp, vif); + vr_interface_make_req(resp, vif, core); ret = vr_message_dump_object(dumper, VR_INTERFACE_OBJECT_ID, resp); if (ret <= 0) break; diff --git a/dpdk/vr_dpdk_interface.c b/dpdk/vr_dpdk_interface.c index 6eb9ff873..76325ff2a 100644 --- a/dpdk/vr_dpdk_interface.c +++ b/dpdk/vr_dpdk_interface.c @@ -23,6 +23,9 @@ #include #include +extern struct vr_interface_stats *vif_get_stats(struct vr_interface *, + unsigned short); + /* * dpdk_virtual_if_add - add a virtual (virtio) interface to vrouter. * Returns 0 on success, < 0 otherwise. @@ -830,6 +833,8 @@ dpdk_if_tx(struct vr_interface *vif, struct vr_packet *pkt) struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif_idx]; struct vr_dpdk_queue *monitoring_tx_queue; struct vr_packet *p_clone; + struct rte_port_out_stats port_stats; + struct vr_interface_stats *vr_stats; int ret; RTE_LOG(DEBUG, VROUTER,"%s: TX packet to interface %s\n", __func__, @@ -854,10 +859,14 @@ dpdk_if_tx(struct vr_interface *vif, struct vr_packet *pkt) if (unlikely(vif->vif_type == VIF_TYPE_AGENT)) { ret = rte_ring_mp_enqueue(vr_dpdk.packet_ring, m); + vr_stats = vif_get_stats(vif, lcore_id); if (ret != 0) { /* TODO: a separate counter for this drop */ vif_drop_pkt(vif, vr_dpdk_mbuf_to_pkt(m), 0); + vr_stats->vis_iftxrngenqdrops++; return -1; + } else { + vr_stats->vis_iftxrngenqpkts++; } #ifdef VR_DPDK_TX_PKT_DUMP #ifdef VR_DPDK_PKT_DUMP_VIF_FILTER @@ -927,13 +936,17 @@ dpdk_if_tx(struct vr_interface *vif, struct vr_packet *pkt) rte_pktmbuf_dump(stdout, m, 0x60); #endif + vr_stats = vif_get_stats(vif, lcore_id); if (likely(tx_queue->txq_ops.f_tx != NULL)) { tx_queue->txq_ops.f_tx(tx_queue->q_queue_h, m); if (lcore_id == VR_DPDK_PACKET_LCORE_ID) tx_queue->txq_ops.f_flush(tx_queue->q_queue_h); + + dpdk_port_out_stats_update(tx_queue, &port_stats, vr_stats); } else { RTE_LOG(DEBUG, VROUTER,"%s: error TXing to interface %s: no queue for lcore %u\n", __func__, vif->vif_name, lcore_id); + vr_stats->vis_ifenqdrops++; vif_drop_pkt(vif, vr_dpdk_mbuf_to_pkt(m), 0); return -1; } @@ -951,6 +964,8 @@ dpdk_if_rx(struct vr_interface *vif, struct vr_packet *pkt) struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif_idx]; struct vr_dpdk_queue *monitoring_tx_queue; struct vr_packet *p_clone; + struct rte_port_out_stats port_stats; + struct vr_interface_stats *vr_stats; RTE_LOG(DEBUG, VROUTER,"%s: TX packet to interface %s\n", __func__, vif->vif_name); @@ -979,11 +994,15 @@ dpdk_if_rx(struct vr_interface *vif, struct vr_packet *pkt) rte_pktmbuf_dump(stdout, m, 0x60); #endif + vr_stats = vif_get_stats(vif, lcore_id); if (likely(tx_queue->txq_ops.f_tx != NULL)) { tx_queue->txq_ops.f_tx(tx_queue->q_queue_h, m); + + dpdk_port_out_stats_update(tx_queue, &port_stats, vr_stats); } else { RTE_LOG(DEBUG, VROUTER,"%s: error TXing to interface %s: no queue for lcore %u\n", __func__, vif->vif_name, lcore_id); + vr_stats->vis_ifenqdrops++; vif_drop_pkt(vif, vr_dpdk_mbuf_to_pkt(m), 0); return -1; } diff --git a/dpdk/vr_dpdk_knidev.c b/dpdk/vr_dpdk_knidev.c index d80fa58f8..1190847d0 100644 --- a/dpdk/vr_dpdk_knidev.c +++ b/dpdk/vr_dpdk_knidev.c @@ -23,7 +23,23 @@ /* * KNI Reader */ +#if DPDK_KNIDEV_READER_STATS_COLLECT == 1 + +#define DPDK_KNIDEV_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define DPDK_KNIDEV_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define DPDK_KNIDEV_READER_STATS_PKTS_IN_ADD(port, val) +#define DPDK_KNIDEV_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + struct dpdk_knidev_reader { + struct rte_port_in_stats stats; + struct rte_kni *kni; }; @@ -64,8 +80,12 @@ dpdk_knidev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct dpdk_knidev_reader *p = (struct dpdk_knidev_reader *) port; + uint32_t nb_rx; - return rte_kni_rx_burst(p->kni, pkts, n_pkts); + nb_rx = rte_kni_rx_burst(p->kni, pkts, n_pkts); + DPDK_KNIDEV_READER_STATS_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; } static int @@ -81,10 +101,42 @@ dpdk_knidev_reader_free(void *port) return 0; } +static int +dpdk_knidev_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct dpdk_knidev_reader *p = + (struct dpdk_knidev_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + /* * KNI Writer */ +#if DPDK_KNIDEV_WRITER_STATS_COLLECT == 1 + +#define DPDK_KNIDEV_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define DPDK_KNIDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define DPDK_KNIDEV_WRITER_STATS_PKTS_IN_ADD(port, val) +#define DPDK_KNIDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + struct dpdk_knidev_writer { + struct rte_port_out_stats stats; + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; uint32_t tx_burst_sz; uint16_t tx_buf_count; @@ -139,6 +191,7 @@ send_burst(struct dpdk_knidev_writer *p) nb_tx = rte_kni_tx_burst(p->kni, p->tx_buf, p->tx_buf_count); + DPDK_KNIDEV_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); for ( ; nb_tx < p->tx_buf_count; nb_tx++) /* TODO: a separate counter for this drop */ vr_dpdk_pfree(p->tx_buf[nb_tx], VP_DROP_INTERFACE_DROP); @@ -152,6 +205,7 @@ dpdk_knidev_writer_tx(void *port, struct rte_mbuf *pkt) struct dpdk_knidev_writer *p = (struct dpdk_knidev_writer *) port; p->tx_buf[p->tx_buf_count++] = pkt; + DPDK_KNIDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); if (p->tx_buf_count >= p->tx_burst_sz) send_burst(p); @@ -183,6 +237,22 @@ dpdk_knidev_writer_free(void *port) return 0; } +static int +dpdk_knidev_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct dpdk_knidev_writer *p = + (struct dpdk_knidev_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + /* * Summary of KNI operations */ @@ -190,6 +260,7 @@ struct rte_port_in_ops dpdk_knidev_reader_ops = { .f_create = dpdk_knidev_reader_create, .f_free = dpdk_knidev_reader_free, .f_rx = dpdk_knidev_reader_rx, + .f_stats = dpdk_knidev_reader_stats_read, }; struct rte_port_out_ops dpdk_knidev_writer_ops = { @@ -198,6 +269,7 @@ struct rte_port_out_ops dpdk_knidev_writer_ops = { .f_tx = dpdk_knidev_writer_tx, .f_tx_bulk = NULL, /* TODO: not implemented */ .f_flush = dpdk_knidev_writer_flush, + .f_stats = dpdk_knidev_writer_stats_read, }; /* Release KNI RX queue */ diff --git a/dpdk/vr_dpdk_lcore.c b/dpdk/vr_dpdk_lcore.c index 8287db6d8..cc3b0655c 100644 --- a/dpdk/vr_dpdk_lcore.c +++ b/dpdk/vr_dpdk_lcore.c @@ -28,6 +28,9 @@ #include #include +extern struct vr_interface_stats *vif_get_stats(struct vr_interface *, + unsigned short); + /* * vr_dpdk_phys_lcore_least_used_get - returns the least used lcore among the * ones that handle TX for physical interfaces. @@ -512,6 +515,9 @@ dpdk_lcore_fwd_rx(struct vr_dpdk_lcore *lcore) uint32_t nb_pkts; struct vr_packet *pkt_arr[VR_DPDK_MAX_BURST_SZ]; int pkti; + const unsigned lcore_id = rte_lcore_id(); + struct rte_port_in_stats port_stats; + struct vr_interface_stats *vr_stats; /* for all RX queues */ SLIST_FOREACH(rx_queue, &lcore->lcore_rx_head, q_next) { @@ -528,9 +534,16 @@ dpdk_lcore_fwd_rx(struct vr_dpdk_lcore *lcore) } vr_dpdk_virtio_enq_pkts_to_phys_lcore(rx_queue, pkt_arr, nb_pkts); + /** + * TODO: When we hash MPLSoGRE packets to different lcores, we will + * need to increment vis_ifrxrngenqpkts for physical interface here. + */ } else { dpdk_vroute(rx_queue->q_vif, pkts, nb_pkts); } + + vr_stats = vif_get_stats(rx_queue->q_vif, lcore_id); + dpdk_port_in_stats_update(rx_queue, &port_stats, vr_stats); } } return total_pkts; @@ -547,6 +560,9 @@ dpdk_lcore_fwd_io(struct vr_dpdk_lcore *lcore) struct vr_dpdk_ring_to_push *rtp; uint16_t nb_rtp; struct rte_ring *ring; + const unsigned lcore_id = rte_lcore_id(); + struct rte_port_out_stats port_stats; + struct vr_interface_stats *vr_stats; /* TODO: skip RX queues with no packets to read * RX operation for KNIs is quite expensive. We used rx_queue_mask to @@ -571,9 +587,9 @@ dpdk_lcore_fwd_io(struct vr_dpdk_lcore *lcore) VR_DPDK_MAX_BURST_SZ-1); if (likely(nb_pkts != 0)) { total_pkts += nb_pkts; - if (likely(rtp->rtp_tx_queue != NULL)) { /* check if TX queue is available */ + vr_stats = vif_get_stats(rtp->rtp_tx_queue->q_vif, lcore_id); if (likely(rtp->rtp_tx_queue->txq_ops.f_tx != NULL)) { /* push packets to the TX queue */ /* TODO: use f_tx_bulk instead */ @@ -581,8 +597,12 @@ dpdk_lcore_fwd_io(struct vr_dpdk_lcore *lcore) rtp->rtp_tx_queue->txq_ops.f_tx( rtp->rtp_tx_queue->q_queue_h, pkts[i]); } + + dpdk_port_out_stats_update(rtp->rtp_tx_queue, &port_stats, + vr_stats); } else { /* TX queue has been deleted, so just drop the packets */ + vr_stats->vis_ifenqdrops += nb_pkts; for (i = 0; i < nb_pkts; i++) /* TODO: a separate counter for this drop */ vr_dpdk_pfree(pkts[i], VP_DROP_INTERFACE_DROP); diff --git a/dpdk/vr_dpdk_usocket.c b/dpdk/vr_dpdk_usocket.c index 32037efdf..9f1a4c882 100644 --- a/dpdk/vr_dpdk_usocket.c +++ b/dpdk/vr_dpdk_usocket.c @@ -29,6 +29,8 @@ extern void dpdk_burst_rx(unsigned int, struct rte_mbuf *[], struct vr_interface *, const char *, unsigned int); extern struct nlmsghdr *dpdk_nl_message_hdr(struct vr_message *); extern unsigned int dpdk_nl_message_len(struct vr_message *); +extern struct vr_interface_stats *vif_get_stats(struct vr_interface *, + unsigned short); static int vr_usocket_accept(struct vr_usocket *); static int vr_usocket_connect(struct vr_usocket *); @@ -457,9 +459,15 @@ vr_dpdk_pkt0_receive(struct vr_usocket *usockp) struct vr_packet *pkt; const unsigned lcore_id = rte_lcore_id(); struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; + struct vr_interface_stats *vr_stats; RTE_LOG(DEBUG, USOCK, "%s[%lx]: FD %d\n", __func__, pthread_self(), usockp->usock_fd); + /** + * Packets is read from the agent's socket here. On success, a counter for + * packets dequeued from the interface is incremented. + */ + vr_stats = vif_get_stats(usockp->usock_vif, lcore_id); if (usockp->usock_vif) { /* buf_addr and data_off do not change */ usockp->usock_mbuf->data_len = usockp->usock_read_len; @@ -472,9 +480,15 @@ vr_dpdk_pkt0_receive(struct vr_usocket *usockp) vr_dpdk_lcore_flush(lcore); rcu_quiescent_state(); + vr_stats->vis_ifdeqpkts++; } else { + /** + * If reading from socket failed, increment counter for interface + * dequeue drops. + */ RTE_LOG(ERR, VROUTER, "Error receiving from packet socket: no vif attached\n"); vr_dpdk_pfree(usockp->usock_mbuf, VP_DROP_INTERFACE_DROP); + vr_stats->vis_ifdeqdrops++; } usockp->usock_mbuf = NULL; @@ -490,14 +504,30 @@ vr_dpdk_drain_pkt0_ring(struct vr_usocket *usockp) int i; unsigned nb_pkts; struct rte_mbuf *mbuf_arr[VR_DPDK_RING_RX_BURST_SZ]; + const unsigned lcore_id = rte_lcore_id(); + struct vr_interface_stats *vr_stats; RTE_LOG(DEBUG, USOCK, "%s[%lx]: draining pkt0 ring...\n", __func__, pthread_self()); + vr_stats = vif_get_stats(usockp->usock_parent->usock_vif, lcore_id); do { nb_pkts = rte_ring_sc_dequeue_burst(vr_dpdk.packet_ring, (void **)&mbuf_arr, VR_DPDK_RING_RX_BURST_SZ); for (i = 0; i < nb_pkts; i++) { - usock_mbuf_write(usockp->usock_parent, mbuf_arr[i]); + /** + * Packets is written to the agent's socket here. On success, + * a counter for packets enqueued to the interface is incremented. + */ + if (usock_mbuf_write(usockp->usock_parent, mbuf_arr[i]) > 0) + vr_stats->vis_ifenqpkts++; + else { + /** + * If writing to socket failed, increment counter for interface + * enqueue drops. + */ + vr_stats->vis_ifenqdrops++; + } + rte_pktmbuf_free(mbuf_arr[i]); } } while (nb_pkts > 0); diff --git a/dpdk/vr_dpdk_virtio.c b/dpdk/vr_dpdk_virtio.c index d64ec1621..fb142e552 100644 --- a/dpdk/vr_dpdk_virtio.c +++ b/dpdk/vr_dpdk_virtio.c @@ -16,6 +16,9 @@ #include #include +extern struct vr_interface_stats *vif_get_stats(struct vr_interface *, + unsigned short); + void *vr_dpdk_vif_clients[VR_MAX_INTERFACES]; vr_dpdk_virtioq_t vr_dpdk_virtio_rxqs[VR_MAX_INTERFACES][VR_MAX_CPUS]; vr_dpdk_virtioq_t vr_dpdk_virtio_txqs[VR_MAX_INTERFACES][VR_MAX_CPUS]; @@ -24,11 +27,18 @@ static int dpdk_virtio_from_vm_rx(void *arg, struct rte_mbuf **pkts, uint32_t max_pkts); static int dpdk_virtio_to_vm_tx(void *arg, struct rte_mbuf *pkt); static int dpdk_virtio_to_vm_flush(void *arg); +static int dpdk_virtio_writer_stats_read(void *arg, + struct rte_port_out_stats *stats, + int clear); +static int dpdk_virtio_reader_stats_read(void *arg, + struct rte_port_in_stats *stats, + int clear); struct rte_port_in_ops dpdk_virtio_reader_ops = { .f_create = NULL, .f_free = NULL, .f_rx = dpdk_virtio_from_vm_rx, + .f_stats = dpdk_virtio_reader_stats_read }; struct rte_port_out_ops dpdk_virtio_writer_ops = { @@ -37,6 +47,7 @@ struct rte_port_out_ops dpdk_virtio_writer_ops = { .f_tx = dpdk_virtio_to_vm_tx, .f_tx_bulk = NULL, /* TODO: not implemented */ .f_flush = dpdk_virtio_to_vm_flush, + .f_stats = dpdk_virtio_writer_stats_read }; /* @@ -268,6 +279,20 @@ vr_dpdk_virtio_get_mempool(void) return vr_dpdk.virtio_mempool; } +#if DPDK_VIRTIO_READER_STATS_COLLECT == 1 + +#define DPDK_VIRTIO_READER_STATS_PKTS_IN_ADD(port, val) \ + port->vdv_in_stats.n_pkts_in += val +#define DPDK_VIRTIO_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->vdv_in_stats.n_pkts_drop += val + +#else + +#define DPDK_VIRTIO_READER_STATS_PKTS_IN_ADD(port, val) +#define DPDK_VIRTIO_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + /* * dpdk_virtio_from_vm_rx - receive packets from a virtio client so that * the packets can be handed to vrouter for forwarding. the virtio client is @@ -392,9 +417,23 @@ dpdk_virtio_from_vm_rx(void *arg, struct rte_mbuf **pkts, uint32_t max_pkts) DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p pkts_sent %u\n", __func__, vq, pkts_sent); + DPDK_VIRTIO_READER_STATS_PKTS_IN_ADD(vq, pkts_sent); return pkts_sent; } +#if DPDK_VIRTIO_WRITER_STATS_COLLECT == 1 + +#define DPDK_VIRTIO_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->vdv_out_stats.n_pkts_in += val +#define DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->vdv_out_stats.n_pkts_drop += val + +#else + +#define DPDK_VIRTIO_WRITER_STATS_PKTS_IN_ADD(port, val) +#define DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif /* * dpdk_virtio_to_vm_tx - sends a packet from vrouter to a virtio client. The @@ -408,11 +447,13 @@ dpdk_virtio_to_vm_tx(void *arg, struct rte_mbuf *mbuf) vr_dpdk_virtioq_t *vq = (vr_dpdk_virtioq_t *) arg; if (vq->vdv_ready_state == VQ_NOT_READY) { + DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(vq, 1); vr_dpdk_pfree(mbuf, VP_DROP_INTERFACE_DROP); return -1; } vq->vdv_tx_mbuf[vq->vdv_tx_mbuf_count++] = mbuf; + DPDK_VIRTIO_WRITER_STATS_PKTS_IN_ADD(vq, 1); if (vq->vdv_tx_mbuf_count >= VR_DPDK_VIRTIO_TX_BURST_SZ) { dpdk_virtio_to_vm_flush(vq); } @@ -475,6 +516,7 @@ dpdk_virtio_to_vm_flush(void *arg) desc = &vq->vdv_desc[next_desc_idx]; buf_addr = vr_dpdk_guest_phys_to_host_virt(vq, desc->addr); if (buf_addr == NULL) { + DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(vq, 1); vr_dpdk_pfree(vq->vdv_tx_mbuf[i], VP_DROP_INTERFACE_DROP); continue; } @@ -496,6 +538,7 @@ dpdk_virtio_to_vm_flush(void *arg) buf_addr = vr_dpdk_guest_phys_to_host_virt(vq, desc->addr); if (buf_addr == NULL) { + DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(vq, 1); vr_dpdk_pfree(vq->vdv_tx_mbuf[i], VP_DROP_INTERFACE_DROP); continue; } @@ -520,9 +563,9 @@ dpdk_virtio_to_vm_flush(void *arg) * Free any packets that could not be sent to the VM because it didn't * post receive buffers soon enough. */ - for (; i < vq->vdv_tx_mbuf_count; i++) { + DPDK_VIRTIO_WRITER_STATS_PKTS_DROP_ADD(vq, vq->vdv_tx_mbuf_count - i); + for (; i < vq->vdv_tx_mbuf_count; i++) vr_dpdk_pfree(vq->vdv_tx_mbuf[i], VP_DROP_INTERFACE_DROP); - } vq->vdv_tx_mbuf_count = 0; @@ -807,6 +850,8 @@ vr_dpdk_virtio_enq_pkts_to_phys_lcore(struct vr_dpdk_queue *rx_queue, vr_dpdk_virtioq_t *vq; struct rte_ring *vq_pring; int nb_enq; + struct vr_interface_stats *vr_stats; + const unsigned lcore_id = rte_lcore_id(); vq = (vr_dpdk_virtioq_t *) rx_queue->q_queue_h; vq_pring = vq->vdv_pring; @@ -814,8 +859,53 @@ vr_dpdk_virtio_enq_pkts_to_phys_lcore(struct vr_dpdk_queue *rx_queue, RTE_LOG(DEBUG, VROUTER, "%s: enqueue %u pakets to ring %p\n", __func__, npkts, vq_pring); nb_enq = rte_ring_sp_enqueue_burst(vq_pring, (void **) pkt_arr, npkts); + + /** + * Packets received from VM are enqueued to the ring here. Increment + * a counter for RX'd-and-enqueued packets by the number of successfully + * enqueued packets. + */ + vr_stats = vif_get_stats(rx_queue->q_vif, lcore_id); + if (nb_enq > 0) + vr_stats->vis_ifrxrngenqpkts += nb_enq; + + /** + * Increment a counter for RX'd-but-not-enqueued packets by the difference + * of packets we wish to enqueue and packets really enqueued. + */ + vr_stats->vis_ifrxrngenqdrops += npkts - nb_enq; for ( ; nb_enq < npkts; nb_enq++) vr_pfree(pkt_arr[nb_enq], VP_DROP_INTERFACE_DROP); return; } + +static int +dpdk_virtio_reader_stats_read(void *arg, + struct rte_port_in_stats *stats, int clear) +{ + vr_dpdk_virtioq_t *vq = (vr_dpdk_virtioq_t *) arg; + + if (stats != NULL) + memcpy(stats, &vq->vdv_in_stats, sizeof(vq->vdv_in_stats)); + + if (clear) + memset(&vq->vdv_in_stats, 0, sizeof(vq->vdv_in_stats)); + + return 0; +} + +static int +dpdk_virtio_writer_stats_read(void *arg, + struct rte_port_out_stats *stats, int clear) +{ + vr_dpdk_virtioq_t *vq = (vr_dpdk_virtioq_t *) arg; + + if (stats != NULL) + memcpy(stats, &vq->vdv_out_stats, sizeof(vq->vdv_out_stats)); + + if (clear) + memset(&vq->vdv_out_stats, 0, sizeof(vq->vdv_out_stats)); + + return 0; +} diff --git a/dpdk/vr_dpdk_virtio.h b/dpdk/vr_dpdk_virtio.h index ad42555ad..8cfab84f4 100644 --- a/dpdk/vr_dpdk_virtio.h +++ b/dpdk/vr_dpdk_virtio.h @@ -28,6 +28,9 @@ typedef enum vq_ready_state { } vq_ready_state_t; typedef struct vr_dpdk_virtioq { + struct rte_port_out_stats vdv_out_stats; + struct rte_port_in_stats vdv_in_stats; + struct vring_desc *vdv_desc; /**< Virtqueue descriptor ring. */ struct vring_avail *vdv_avail; /**< Virtqueue available ring. */ struct vring_used *vdv_used; /**< Virtqueue used ring. */ diff --git a/include/vr_dpdk.h b/include/vr_dpdk.h index 2948291e7..49259c440 100644 --- a/include/vr_dpdk.h +++ b/include/vr_dpdk.h @@ -25,6 +25,10 @@ #include #include +#include + +extern struct vr_interface_stats *vif_get_stats(struct vr_interface *, + unsigned short); /* * Use RTE_LOG_DEBUG to enable debug logs. @@ -364,6 +368,79 @@ struct vr_dpdk_global { extern struct vr_dpdk_global vr_dpdk; +/** + * Enable sent/received/dropped packets statistics + */ +#define DPDK_KNIDEV_WRITER_STATS_COLLECT 1 +#define DPDK_KNIDEV_READER_STATS_COLLECT 1 +#define DPDK_VIRTIO_WRITER_STATS_COLLECT 1 +#define DPDK_VIRTIO_READER_STATS_COLLECT 1 + +/** + * dpdk_port_out_stats_update + * + * Updates counters for: + * - packets enqueued to the interface successfully. + * - packets which have been dropped during .f_tx() or .f_flush(). + * If we write to ring instead of NIC's queue, count it as a ring enqueue. + * + * port_stats is updated by .f_tx() and .f_flush(). + * vr_stats is returned by vif_get_stats(). + */ +static inline void +dpdk_port_out_stats_update(struct vr_dpdk_queue *txq, + struct rte_port_out_stats *port_stats, + struct vr_interface_stats *vr_stats) +{ + if (!port_stats || !vr_stats) + return; + + if (likely(txq->txq_ops.f_stats != NULL)) { + txq->txq_ops.f_stats(txq->q_queue_h, port_stats, 0); + + /** + * It does not matter if we check equality of .f_tx of .f_flush here, + * equality of .f_txs implies equality of .f_flushes. + */ + if (txq->txq_ops.f_tx == rte_port_ring_writer_ops.f_tx) { + vr_stats->vis_iftxrngenqpkts = port_stats->n_pkts_in; + vr_stats->vis_iftxrngenqdrops = port_stats->n_pkts_drop; + } else { + vr_stats->vis_ifenqpkts = port_stats->n_pkts_in; + vr_stats->vis_ifenqdrops = port_stats->n_pkts_drop; + } + } +} + +/** + * dpdk_port_in_stats_update + * + * Updates counters for: + * - packets dequeued from the interface successfully. + * - packets which have been dropped during .f_rx(). + * + * port_stats is updated by .f_rx(). + * vr_stats is returned by vif_get_stats(). + */ +static inline void +dpdk_port_in_stats_update(struct vr_dpdk_queue *rxq, + struct rte_port_in_stats *port_stats, + struct vr_interface_stats *vr_stats) +{ + if (!port_stats || !vr_stats) + return; + + if (likely(rxq->rxq_ops.f_stats != NULL)) { + rxq->rxq_ops.f_stats(rxq->q_queue_h, port_stats, 0); + + /** + * We don't use .f_rx for rings, so no need to check. + */ + vr_stats->vis_ifdeqpkts = port_stats->n_pkts_in; + vr_stats->vis_ifdeqdrops = port_stats->n_pkts_drop; + } +} + /* * rte_mbuf <=> vr_packet conversion * @@ -541,8 +618,20 @@ static inline void vr_dpdk_lcore_flush(struct vr_dpdk_lcore *lcore) { struct vr_dpdk_queue *tx_queue; + const unsigned lcore_id = rte_lcore_id(); + struct rte_port_out_stats port_stats; + struct vr_interface_stats *vr_stats; + SLIST_FOREACH(tx_queue, &lcore->lcore_tx_head, q_next) { tx_queue->txq_ops.f_flush(tx_queue->q_queue_h); + /** + * Don't update stats if we write to agent interface, as it does + * not use rte_port_out_stats structure. + */ + if (tx_queue->q_vif->vif_type != VIF_TYPE_AGENT) { + vr_stats = vif_get_stats(tx_queue->q_vif, lcore_id); + dpdk_port_out_stats_update(tx_queue, &port_stats, vr_stats); + } } } /* Send a burst of vr_packets to vRouter */ diff --git a/include/vr_interface.h b/include/vr_interface.h index d87f559be..47e981f5e 100644 --- a/include/vr_interface.h +++ b/include/vr_interface.h @@ -108,6 +108,14 @@ struct vr_interface_stats { uint64_t vis_obytes; uint64_t vis_opackets; uint64_t vis_oerrors; + uint64_t vis_ifenqpkts; + uint64_t vis_ifenqdrops; + uint64_t vis_ifdeqpkts; + uint64_t vis_ifdeqdrops; + uint64_t vis_iftxrngenqpkts; + uint64_t vis_iftxrngenqdrops; + uint64_t vis_ifrxrngenqpkts; + uint64_t vis_ifrxrngenqdrops; }; struct vr_packet; diff --git a/sandesh/vr.sandesh b/sandesh/vr.sandesh index 89a844780..512cb6ce3 100644 --- a/sandesh/vr.sandesh +++ b/sandesh/vr.sandesh @@ -49,36 +49,45 @@ buffer sandesh vr_nexthop_req { buffer sandesh vr_interface_req { 1: sandesh_op h_op; - 2: i32 vifr_type; - 3: i32 vifr_flags; - 4: i32 vifr_vrf; - 5: i32 vifr_idx; - 6: i32 vifr_rid; - 7: i32 vifr_os_idx; - 8: i32 vifr_mtu; - 9: string vifr_name; - 10: i64 vifr_ibytes; - 11: i64 vifr_ipackets; - 12: i64 vifr_ierrors; - 13: i64 vifr_obytes; - 14: i64 vifr_opackets; - 15: i64 vifr_oerrors; - 16: i32 vifr_ref_cnt; - 17: i32 vifr_marker; - 18: list vifr_mac; - 19: i32 vifr_ip; - 20: i32 vifr_context; - 21: i16 vifr_mir_id; - 22: i32 vifr_speed; - 23: i32 vifr_duplex; - 24: i16 vifr_vlan_id; - 25: i32 vifr_parent_vif_idx; - 26: i16 vifr_nh_id; - 27: i32 vifr_cross_connect_idx; - 28: list vifr_src_mac; - 29: i32 vifr_bridge_idx; - 30: i16 vifr_ovlan_id; - 31: byte vifr_transport; + 2: u32 vifr_core; + 3: i32 vifr_type; + 4: i32 vifr_flags; + 5: i32 vifr_vrf; + 6: i32 vifr_idx; + 7: i32 vifr_rid; + 8: i32 vifr_os_idx; + 9: i32 vifr_mtu; + 10: string vifr_name; + 11: i64 vifr_ibytes; + 12: i64 vifr_ipackets; + 13: i64 vifr_ierrors; + 14: i64 vifr_obytes; + 15: i64 vifr_opackets; + 16: i64 vifr_oerrors; + 17: i64 vifr_ifenqpkts; + 18: i64 vifr_ifenqdrops; + 19: i64 vifr_ifdeqpkts; + 20: i64 vifr_ifdeqdrops; + 21: i64 vifr_iftxrngenqpkts; + 22: i64 vifr_iftxrngenqdrops; + 23: i64 vifr_ifrxrngenqpkts; + 24: i64 vifr_ifrxrngenqdrops; + 25: i32 vifr_ref_cnt; + 26: i32 vifr_marker; + 27: list vifr_mac; + 28: i32 vifr_ip; + 29: i32 vifr_context; + 30: i16 vifr_mir_id; + 31: i32 vifr_speed; + 32: i32 vifr_duplex; + 33: i16 vifr_vlan_id; + 34: i32 vifr_parent_vif_idx; + 35: i16 vifr_nh_id; + 36: i32 vifr_cross_connect_idx; + 37: list vifr_src_mac; + 38: i32 vifr_bridge_idx; + 39: i16 vifr_ovlan_id; + 40: byte vifr_transport; } buffer sandesh vr_vxlan_req { @@ -224,7 +233,7 @@ buffer sandesh vrouter_ops { buffer sandesh vr_drop_stats_req { 1: sandesh_op h_op; - 2: i16 vds_rid; + 2: i16 vds_rid; 3: i64 vds_discard; 4: i64 vds_pull; 5: i64 vds_invalid_if; diff --git a/utils/vif.c b/utils/vif.c index 770baed21..31bb2db3d 100644 --- a/utils/vif.c +++ b/utils/vif.c @@ -42,15 +42,15 @@ #include "ini_parser.h" -#define VHOST_TYPE_STRING "vhost" -#define AGENT_TYPE_STRING "agent" -#define PHYSICAL_TYPE_STRING "physical" -#define VIRTUAL_TYPE_STRING "virtual" -#define XEN_LL_TYPE_STRING "xenll" -#define GATEWAY_TYPE_STRING "gateway" +#define VHOST_TYPE_STRING "vhost" +#define AGENT_TYPE_STRING "agent" +#define PHYSICAL_TYPE_STRING "physical" +#define VIRTUAL_TYPE_STRING "virtual" +#define XEN_LL_TYPE_STRING "xenll" +#define GATEWAY_TYPE_STRING "gateway" #define VIRTUAL_VLAN_TYPE_STRING "virtual-vlan" -#define STATS_TYPE_STRING "stats" -#define MONITORING_TYPE_STRING "monitoring" +#define STATS_TYPE_STRING "stats" +#define MONITORING_TYPE_STRING "monitoring" static struct nl_client *cl; static char flag_string[32], if_name[IFNAMSIZ]; @@ -62,11 +62,12 @@ static int if_xconnect_kindex = -1; static int if_vif_index = -1; static short vlan_id = -1; static int vr_ifflags; +static int core = -1; static int add_set, create_set, get_set, list_set; static int kindex_set, type_set, help_set, set_set, vlan_set, dhcp_set; static int vrf_set, mac_set, delete_set, policy_set, pmd_set, vindex_set, pci_set; -static int xconnect_set, vif_set, vhost_phys_set; +static int xconnect_set, vif_set, vhost_phys_set, core_set; static unsigned int vr_op, vr_if_type; static bool ignore_error = false, dump_pending = false; @@ -188,6 +189,13 @@ vr_interface_print_header(void) array_size = sizeof(flag_metadata) / sizeof(flag_metadata[0]); printf("Vrouter Interface Table\n\n"); + + if (core_set && core > -1) + printf("Statistics for core %d\n\n", core); + + if (core_set) + return; + printf("Flags: "); for (i = 0; i < array_size; i++) { @@ -324,6 +332,7 @@ vr_interface_req_process(void *s) printf("Vrf:%d Flags:%s MTU:%d Ref:%d\n", req->vifr_vrf, req->vifr_flags ? vr_if_flags(req->vifr_flags) : "NULL" , req->vifr_mtu, req->vifr_ref_cnt); + vr_interface_print_head_space(); printf("RX packets:%" PRId64 " bytes:%" PRId64 " errors:%" PRId64 "\n", req->vifr_ipackets, @@ -332,6 +341,29 @@ vr_interface_req_process(void *s) printf("TX packets:%" PRId64 " bytes:%" PRId64 " errors:%" PRId64 "\n", req->vifr_opackets, req->vifr_obytes, req->vifr_oerrors); + + /* Additional DPDK-specific statistics */ + if (platform == DPDK_PLATFORM) { + vr_interface_print_head_space(); + printf("Packets sent to vif: %" PRId64" Drops:%" PRId64 " \n", + req->vifr_ifenqpkts, req->vifr_ifenqdrops); + vr_interface_print_head_space(); + printf("Packets enqueued on TX ring: %" PRId64" Drops:%" PRId64 " \n", + req->vifr_iftxrngenqpkts, req->vifr_iftxrngenqdrops); + vr_interface_print_head_space(); + printf("Packets received on vif: %" PRId64" Drops:%" PRId64 " \n", + req->vifr_ifdeqpkts, req->vifr_ifdeqdrops); + /** + * TODO: when we hash MPLSoGRE packets to different lcores, it should + * apply to virtual as well as physical interfaces. + */ + if (req->vifr_type == VIF_TYPE_VIRTUAL + /* || req->vifr_type == VIF_TYPE_PHYSICAL */) { + vr_interface_print_head_space(); + printf("Packets enqueued on RX ring: %" PRId64" Drops:%" PRId64 " \n", + req->vifr_ifrxrngenqpkts, req->vifr_ifrxrngenqdrops); + } + } printf("\n"); if (list_set) @@ -595,6 +627,25 @@ vr_intf_op(unsigned int op) break; case SANDESH_OP_GET: + /** + * Implementation of getting per-core vif statistics is based on this + * little trick to avoid making changes in how agent makes requests for + * statistics. From vRouter's and agent's point of view, request for + * stats for 0th core means a request for stats summed up for all the + * cores. So cores are enumerated starting with 1. + * Meanwhile, from user's point of view they are enumerated starting + * with 0 (e.g. vif --list --core 0 means 'vif statistics for the very + * first (0th) core'). This is how Linux enumerates CPUs, so it should + * be more intuitive for the user. + * + * Agent is not aware of possibility of asking for per-core stats. Its + * requests have vifr_core implicitly set to 0. So we need to make a + * conversion between those enumerating systems. The vif utility + * increments by 1 the core number user asked for. Then it is + * decremented back in vRouter. + */ + intf_req.vifr_core = (unsigned int)(core + 1); + /* * this logic is slightly complicated. if --kernel option is set * for get or when if_kindex is set for add doing a get, we should @@ -611,6 +662,7 @@ vr_intf_op(unsigned int op) break; case SANDESH_OP_DUMP: + intf_req.vifr_core = (unsigned int)(core + 1); break; } @@ -647,9 +699,9 @@ Usage() printf("\t \t--vif ]\n"); printf( "[--id --pmd --pci]\n"); printf("\t [--delete ]\n"); - printf("\t [--get ][--kernel]\n"); + printf("\t [--get ][--kernel][--core ]\n"); printf("\t [--set --vlan --vrf ]\n"); - printf("\t [--list]\n"); + printf("\t [--list][--core ]\n"); printf("\t [--help]\n"); exit(0); @@ -677,6 +729,7 @@ enum if_opt_index { VHOST_PHYS_OPT_INDEX, HELP_OPT_INDEX, VINDEX_OPT_INDEX, + CORE_OPT_INDEX, MAX_OPT_INDEX }; @@ -700,7 +753,8 @@ static struct option long_options[] = { [VIF_OPT_INDEX] = {"vif", required_argument, &vif_set, 1}, [DHCP_OPT_INDEX] = {"dhcp-enable", no_argument, &dhcp_set, 1}, [HELP_OPT_INDEX] = {"help", no_argument, &help_set, 1}, - [VINDEX_OPT_INDEX] = {"id", required_argument, &vindex_set, 1}, + [VINDEX_OPT_INDEX] = {"id", required_argument, &vindex_set, 1}, + [CORE_OPT_INDEX] = {"core", required_argument, &core_set, 1}, [MAX_OPT_INDEX] = { NULL, 0, NULL, 0}, }; @@ -769,6 +823,12 @@ parse_long_opts(int option_index, char *opt_arg) vr_op = SANDESH_OP_DUMP; break; + case CORE_OPT_INDEX: + core = (int)strtol(opt_arg, NULL, 0); + if (core < 0) + core = 0; + break; + case TYPE_OPT_INDEX: vr_if_type = vr_get_if_type(optarg); if (vr_if_type == VIF_TYPE_HOST) @@ -855,17 +915,28 @@ validate_options(void) } if (get_set) { - if ((sum_opt > 1) && (sum_opt != 2 || !kindex_set)) + if ((sum_opt > 1) && (sum_opt != 3) && (!kindex_set && !core_set)) Usage(); return; } - if ((delete_set || list_set)) { + if (delete_set) { if (sum_opt > 1) Usage(); return; } + if (list_set) { + if (!core_set) { + if (sum_opt > 1) + Usage(); + } else { + if (sum_opt != 2) + Usage(); + } + return; + } + if (add_set) { if (get_set || list_set) Usage(); @@ -884,6 +955,15 @@ validate_options(void) return; } + /** + * Statistics per CPU core could be requested as an additional parameter + * to --list or --get. + */ + if (core_set) { + if (!list_set || !get_set) + Usage(); + } + return; } @@ -897,7 +977,7 @@ main(int argc, char *argv[]) */ unsigned int sock_proto = NETLINK_GENERIC; - while ((opt = getopt_long(argc, argv, "ba:c:d:g:klm:t:v:p:DPi:", + while ((opt = getopt_long(argc, argv, "ba:c:d:g:klm:t:v:p:C:DPi:", long_options, &option_index)) >= 0) { switch (opt) { case 'a': @@ -966,6 +1046,11 @@ main(int argc, char *argv[]) parse_long_opts(VINDEX_OPT_INDEX, NULL); break; + case 'C': + core_set = 1; + parse_long_opts(CORE_OPT_INDEX, optarg); + break; + case 0: parse_long_opts(option_index, optarg); break;