DPDK 21.11.1
rte_idxd_rawdev_fns.h
Go to the documentation of this file.
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2021 Intel Corporation
3 */
4#ifndef _RTE_IDXD_RAWDEV_FNS_H_
5#define _RTE_IDXD_RAWDEV_FNS_H_
6
18#include <stdint.h>
19
20/*
21 * Defines used in the data path for interacting with IDXD hardware.
22 */
23#define IDXD_CMD_OP_SHIFT 24
24enum rte_idxd_ops {
25 idxd_op_nop = 0,
26 idxd_op_batch,
27 idxd_op_drain,
28 idxd_op_memmove,
29 idxd_op_fill
30};
31
32#define IDXD_FLAG_FENCE (1 << 0)
33#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
34#define IDXD_FLAG_REQUEST_COMPLETION (1 << 3)
35#define IDXD_FLAG_CACHE_CONTROL (1 << 8)
36
37#define IOAT_COMP_UPDATE_SHIFT 3
38#define IOAT_CMD_OP_SHIFT 24
39enum rte_ioat_ops {
40 ioat_op_copy = 0, /* Standard DMA Operation */
41 ioat_op_fill /* Block Fill */
42};
43
49 uint32_t pasid;
50 uint32_t op_flags;
51 rte_iova_t completion;
52
54 union {
55 rte_iova_t src; /* source address for copy ops etc. */
56 rte_iova_t desc_addr; /* descriptor pointer for batch */
57 };
58 rte_iova_t dst;
59
60 uint32_t size; /* length of data for op, or batch size */
61
62 uint16_t intr_handle; /* completion interrupt handle */
63
64 /* remaining 26 bytes are reserved */
65 uint16_t __reserved[13];
66} __rte_aligned(64);
67
72 uint8_t status;
73 uint8_t result;
74 /* 16-bits pad here */
75 uint32_t completed_size; /* data length, or descriptors for batch */
76
77 rte_iova_t fault_address;
78 uint32_t invalid_flags;
79} __rte_aligned(32);
80
86 uint64_t src;
87 uint64_t dst;
88};
89
94struct rte_idxd_rawdev {
95 enum rte_ioat_dev_type type;
96 struct rte_ioat_xstats xstats;
97
98 void *portal; /* address to write the batch descriptor */
99
100 struct rte_ioat_rawdev_config cfg;
101 rte_iova_t desc_iova; /* base address of desc ring, needed for completions */
102
103 /* counters to track the batches */
104 unsigned short max_batches;
105 unsigned short batch_idx_read;
106 unsigned short batch_idx_write;
107 unsigned short *batch_idx_ring; /* store where each batch ends */
108
109 /* track descriptors and handles */
110 unsigned short desc_ring_mask;
111 unsigned short hdls_avail; /* handles for ops completed */
112 unsigned short hdls_read; /* the read pointer for hdls/desc rings */
113 unsigned short batch_start; /* start+size == write pointer for hdls/desc */
114 unsigned short batch_size;
115
116 struct rte_idxd_hw_desc *desc_ring;
117 struct rte_idxd_user_hdl *hdl_ring;
118 /* flags to indicate handle validity. Kept separate from ring, to avoid
119 * using 8 bytes per flag. Upper 8 bits holds error code if any.
120 */
121 uint16_t *hdl_ring_flags;
122};
123
124#define RTE_IDXD_HDL_NORMAL 0
125#define RTE_IDXD_HDL_INVALID (1 << 0) /* no handle stored for this element */
126#define RTE_IDXD_HDL_OP_FAILED (1 << 1) /* return failure for this one */
127#define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) /* this op was skipped */
128
129static __rte_always_inline uint16_t
130__idxd_burst_capacity(int dev_id)
131{
132 struct rte_idxd_rawdev *idxd =
133 (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
134 uint16_t write_idx = idxd->batch_start + idxd->batch_size;
135 uint16_t used_space, free_space;
136
137 /* Check for space in the batch ring */
138 if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
139 idxd->batch_idx_write + 1 == idxd->batch_idx_read)
140 return 0;
141
142 /* for descriptors, check for wrap-around on write but not read */
143 if (idxd->hdls_read > write_idx)
144 write_idx += idxd->desc_ring_mask + 1;
145 used_space = write_idx - idxd->hdls_read;
146
147 /* Return amount of free space in the descriptor ring
148 * subtract 1 for space for batch descriptor and 1 for possible null desc
149 */
150 free_space = idxd->desc_ring_mask - used_space;
151 if (free_space < 2)
152 return 0;
153 return free_space - 2;
154}
155
157__desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n)
158{
159 return idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc));
160}
161
162static __rte_always_inline int
163__idxd_write_desc(int dev_id,
164 const uint32_t op_flags,
165 const rte_iova_t src,
166 const rte_iova_t dst,
167 const uint32_t size,
168 const struct rte_idxd_user_hdl *hdl)
169{
170 struct rte_idxd_rawdev *idxd =
171 (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
172 uint16_t write_idx = idxd->batch_start + idxd->batch_size;
173 uint16_t mask = idxd->desc_ring_mask;
174
175 /* first check batch ring space then desc ring space */
176 if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
177 idxd->batch_idx_write + 1 == idxd->batch_idx_read)
178 goto failed;
179 /* for descriptor ring, we always need a slot for batch completion */
180 if (((write_idx + 2) & mask) == idxd->hdls_read ||
181 ((write_idx + 1) & mask) == idxd->hdls_read)
182 goto failed;
183
184 /* write desc and handle. Note, descriptors don't wrap */
185 idxd->desc_ring[write_idx].pasid = 0;
186 idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;
187 idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx & mask);
188 idxd->desc_ring[write_idx].src = src;
189 idxd->desc_ring[write_idx].dst = dst;
190 idxd->desc_ring[write_idx].size = size;
191
192 if (hdl == NULL)
193 idxd->hdl_ring_flags[write_idx & mask] = RTE_IDXD_HDL_INVALID;
194 else
195 idxd->hdl_ring[write_idx & mask] = *hdl;
196 idxd->batch_size++;
197
198 idxd->xstats.enqueued++;
199
200 rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
201 return 1;
202
203failed:
204 idxd->xstats.enqueue_failed++;
205 rte_errno = ENOSPC;
206 return 0;
207}
208
209static __rte_always_inline int
210__idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,
211 unsigned int length, uintptr_t dst_hdl)
212{
213 const struct rte_idxd_user_hdl hdl = {
214 .dst = dst_hdl
215 };
216 return __idxd_write_desc(dev_id,
217 (idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
218 pattern, dst, length, &hdl);
219}
220
221static __rte_always_inline int
222__idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,
223 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
224{
225 const struct rte_idxd_user_hdl hdl = {
226 .src = src_hdl,
227 .dst = dst_hdl
228 };
229 return __idxd_write_desc(dev_id,
230 (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
231 src, dst, length, &hdl);
232}
233
234static __rte_always_inline int
235__idxd_enqueue_nop(int dev_id)
236{
237 /* only op field needs filling - zero src, dst and length */
238 return __idxd_write_desc(dev_id, idxd_op_nop << IDXD_CMD_OP_SHIFT,
239 0, 0, 0, NULL);
240}
241
242static __rte_always_inline int
243__idxd_fence(int dev_id)
244{
245 /* only op field needs filling - zero src, dst and length */
246 return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL);
247}
248
249static __rte_always_inline void
250__idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src)
251{
252 asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
253 :
254 : "a" (dst), "d" (src)
255 : "memory");
256}
257
258static __rte_always_inline int
259__idxd_perform_ops(int dev_id)
260{
261 struct rte_idxd_rawdev *idxd =
262 (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
263
264 if (!idxd->cfg.no_prefetch_completions)
265 rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]);
266
267 if (idxd->batch_size == 0)
268 return 0;
269
270 if (idxd->batch_size == 1)
271 /* use a NOP as a null descriptor, so batch_size >= 2 */
272 if (__idxd_enqueue_nop(dev_id) != 1)
273 return -1;
274
275 /* write completion beyond last desc in the batch */
276 uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask;
277 *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */
278 idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID;
279
280 const struct rte_idxd_hw_desc batch_desc = {
281 .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
282 IDXD_FLAG_COMPLETION_ADDR_VALID |
283 IDXD_FLAG_REQUEST_COMPLETION,
284 .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),
285 .completion = __desc_idx_to_iova(idxd, comp_idx),
286 .size = idxd->batch_size,
287 };
288
289 _mm_sfence(); /* fence before writing desc to device */
290 __idxd_movdir64b(idxd->portal, &batch_desc);
291 idxd->xstats.started += idxd->batch_size;
292
293 idxd->batch_start += idxd->batch_size + 1;
294 idxd->batch_start &= idxd->desc_ring_mask;
295 idxd->batch_size = 0;
296
297 idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;
298 if (idxd->batch_idx_write > idxd->max_batches)
299 idxd->batch_idx_write = 0;
300
301 return 0;
302}
303
304static __rte_always_inline int
305__idxd_completed_ops(int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful,
306 uintptr_t *src_hdls, uintptr_t *dst_hdls)
307{
308 struct rte_idxd_rawdev *idxd =
309 (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
310 unsigned short n, h_idx;
311
312 while (idxd->batch_idx_read != idxd->batch_idx_write) {
313 uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];
314 volatile struct rte_idxd_completion *comp_to_chk =
315 (struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk];
316 uint8_t batch_status = comp_to_chk->status;
317 if (batch_status == 0)
318 break;
319 comp_to_chk->status = 0;
320 if (unlikely(batch_status > 1)) {
321 /* error occurred somewhere in batch, start where last checked */
322 uint16_t desc_count = comp_to_chk->completed_size;
323 uint16_t batch_start = idxd->hdls_avail;
324 uint16_t batch_end = idx_to_chk;
325
326 if (batch_start > batch_end)
327 batch_end += idxd->desc_ring_mask + 1;
328 /* go through each batch entry and see status */
329 for (n = 0; n < desc_count; n++) {
330 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
331 volatile struct rte_idxd_completion *comp =
332 (struct rte_idxd_completion *)&idxd->desc_ring[idx];
333 if (comp->status != 0 &&
334 idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) {
335 idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED;
336 idxd->hdl_ring_flags[idx] |= (comp->status << 8);
337 comp->status = 0; /* clear error for next time */
338 }
339 }
340 /* if batch is incomplete, mark rest as skipped */
341 for ( ; n < batch_end - batch_start; n++) {
342 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
343 if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL)
344 idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED;
345 }
346 }
347 /* avail points to one after the last one written */
348 idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;
349 idxd->batch_idx_read++;
350 if (idxd->batch_idx_read > idxd->max_batches)
351 idxd->batch_idx_read = 0;
352 }
353
354 n = 0;
355 h_idx = idxd->hdls_read;
356 while (h_idx != idxd->hdls_avail) {
357 uint16_t flag = idxd->hdl_ring_flags[h_idx];
358 if (flag != RTE_IDXD_HDL_INVALID) {
359 if (!idxd->cfg.hdls_disable) {
360 src_hdls[n] = idxd->hdl_ring[h_idx].src;
361 dst_hdls[n] = idxd->hdl_ring[h_idx].dst;
362 }
363 if (unlikely(flag != RTE_IDXD_HDL_NORMAL)) {
364 if (status != NULL)
365 status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ?
367 /* failure case, return err code */
368 idxd->hdl_ring_flags[h_idx] >> 8;
369 if (num_unsuccessful != NULL)
370 *num_unsuccessful += 1;
371 }
372 n++;
373 }
374 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
375 if (++h_idx > idxd->desc_ring_mask)
376 h_idx = 0;
377 if (n >= max_ops)
378 break;
379 }
380
381 /* skip over any remaining blank elements, e.g. batch completion */
382 while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) {
383 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
384 if (++h_idx > idxd->desc_ring_mask)
385 h_idx = 0;
386 }
387 idxd->hdls_read = h_idx;
388
389 idxd->xstats.completed += n;
390 return n;
391}
392
393#endif
#define unlikely(x)
uint64_t rte_iova_t
Definition: rte_common.h:420
#define RTE_STD_C11
Definition: rte_common.h:42
#define __rte_always_inline
Definition: rte_common.h:228
void * dev_private
#define rte_errno
Definition: rte_errno.h:29
#define RTE_IOAT_OP_SKIPPED
static void rte_prefetch1(const volatile void *p)
static __rte_experimental void rte_prefetch0_write(const void *p)
Definition: rte_prefetch.h:68