|
3 | 3 | #include "simsycl/detail/check.hh" |
4 | 4 | #include "simsycl/sycl/device.hh" |
5 | 5 | #include "simsycl/sycl/platform.hh" |
| 6 | +#include "simsycl/sycl/vec.hh" |
6 | 7 |
|
| 8 | +#include <bit> |
7 | 9 | #include <cassert> |
8 | 10 | #include <iostream> |
9 | 11 | #include <limits> |
@@ -303,15 +305,16 @@ const platform_config builtin_platform{ |
303 | 305 | .extensions = {}, |
304 | 306 | }; |
305 | 307 |
|
306 | | -const device_config builtin_device{ |
307 | | - .device_type = sycl::info::device_type::gpu, |
308 | | - .vendor_id = 0, |
309 | | - .max_compute_units = 16, |
310 | | - .max_work_item_dimensions = 3, |
311 | | - .max_work_item_sizes_1 = {1024}, |
312 | | - .max_work_item_sizes_2 = {1024, 1024}, |
313 | | - .max_work_item_sizes_3 = {64, 1024, 1024}, |
314 | | - .max_work_group_size = 1024, |
| 308 | +// clang-format off |
| 309 | +const device_config builtin_device { |
| 310 | + .device_type = sycl::info::device_type::gpu, // |
| 311 | + .vendor_id = 0, // |
| 312 | + .max_compute_units = 16, // |
| 313 | + .max_work_item_dimensions = 3, // |
| 314 | + .max_work_item_sizes_1 = {1024}, // |
| 315 | + .max_work_item_sizes_2 = {1024, 1024}, // |
| 316 | + .max_work_item_sizes_3 = {1024, 1024, 1024}, // |
| 317 | + .max_work_group_size = 1024, // |
315 | 318 | .max_num_sub_groups = 32, |
316 | 319 | .sub_group_sizes = {32}, |
317 | 320 | .preferred_vector_width_char = 4, |
@@ -342,56 +345,75 @@ const device_config builtin_device{ |
342 | 345 | .image_max_buffer_size = 0, |
343 | 346 | .max_samplers = 0, |
344 | 347 | .max_parameter_size = std::numeric_limits<std::size_t>::max(), |
345 | | - .mem_base_addr_align = 8, |
| 348 | + .mem_base_addr_align = 8 * sizeof(sycl::long16), |
346 | 349 | .half_fp_config |
347 | | - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, |
348 | | - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, |
349 | | - sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
350 | | - .single_fp_config |
351 | | - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, |
352 | | - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, |
353 | | - sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
354 | | - .double_fp_config |
355 | | - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, |
356 | | - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, |
357 | | - sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
| 350 | +#if SIMSYCL_FEATURE_HALF_TYPE |
| 351 | + = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, |
| 352 | + sycl::info::fp_config::fma, sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
| 353 | +#else |
| 354 | + = {}, |
| 355 | +#endif |
| 356 | + .single_fp_config = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, |
| 357 | + sycl::info::fp_config::fma, sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
| 358 | + .double_fp_config = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, |
| 359 | + sycl::info::fp_config::fma, sycl::info::fp_config::correctly_rounded_divide_sqrt}, |
358 | 360 | .global_mem_cache_type = sycl::info::global_mem_cache_type::read_write, |
359 | | - .global_mem_cache_line_size = 128, |
| 361 | + .global_mem_cache_line_size = 64, |
360 | 362 | .global_mem_cache_size = 16 << 20, |
361 | 363 | .global_mem_size = std::numeric_limits<std::size_t>::max(), |
362 | 364 | .max_constant_buffer_size = 1 << 16, |
363 | 365 | .max_constant_args = std::numeric_limits<uint32_t>::max(), |
364 | 366 | .local_mem_type = sycl::info::local_mem_type::local, |
365 | 367 | .local_mem_size = 64 << 10, |
366 | 368 | .error_correction_support = false, |
367 | | - .host_unified_memory = false, |
| 369 | + .host_unified_memory = true, |
| 370 | + .atomic_memory_order_capabilities = {sycl::memory_order::relaxed, sycl::memory_order::acquire, |
| 371 | + sycl::memory_order::release, sycl::memory_order::acq_rel, sycl::memory_order::seq_cst}, |
| 372 | + .atomic_fence_order_capabilities = {sycl::memory_order::relaxed, sycl::memory_order::acquire, |
| 373 | + sycl::memory_order::release, sycl::memory_order::acq_rel, sycl::memory_order::seq_cst}, |
| 374 | + .atomic_memory_scope_capabilities = {sycl::memory_scope::work_item, |
| 375 | + sycl::memory_scope::sub_group, sycl::memory_scope::work_group, sycl::memory_scope::device, |
| 376 | + sycl::memory_scope::system }, |
| 377 | + .atomic_fence_scope_capabilities = {sycl::memory_scope::work_item, |
| 378 | + sycl::memory_scope::sub_group, sycl::memory_scope::work_group, sycl::memory_scope::device, |
| 379 | + sycl::memory_scope::system }, |
368 | 380 | .profiling_timer_resolution = 1, |
369 | | - .is_endian_little = true, |
| 381 | + .is_endian_little = std::endian::native == std::endian::little, |
370 | 382 | .is_available = true, |
371 | | - .is_compiler_available = true, |
372 | | - .is_linker_available = true, |
| 383 | + .is_compiler_available = false, |
| 384 | + .is_linker_available = false, |
373 | 385 | .execution_capabilities = {sycl::info::execution_capability::exec_kernel}, |
374 | 386 | .queue_profiling = true, |
375 | 387 | .built_in_kernels = {}, |
376 | 388 | .platform_id = "SimSYCL", |
377 | 389 | .name = "SimSYCL virtual GPU", |
378 | 390 | .vendor = "SimSYCL", |
379 | 391 | .driver_version = "0.1", |
380 | | - .profile = "FULL_PROFILE", |
381 | 392 | .version = "0.1", |
382 | | - .aspects |
383 | | - = { sycl::aspect::gpu, sycl::aspect::accelerator, sycl::aspect::fp64, sycl::aspect::atomic64, |
384 | | - sycl::aspect::queue_profiling, sycl::aspect::usm_device_allocations, sycl::aspect::usm_host_allocations, |
385 | | - sycl::aspect::usm_shared_allocations, }, |
386 | | - .extensions = {}, |
| 393 | + .aspects = { sycl::aspect::gpu, sycl::aspect::accelerator, sycl::aspect::emulated, |
| 394 | + sycl::aspect::host_debuggable, |
| 395 | +#if SIMSYCL_FEATURE_HALF_TYPE |
| 396 | + sycl::aspect::fp16, |
| 397 | +#endif |
| 398 | + sycl::aspect::fp64, sycl::aspect::atomic64, sycl::aspect::queue_profiling, |
| 399 | + sycl::aspect::usm_device_allocations, sycl::aspect::usm_host_allocations, |
| 400 | + sycl::aspect::usm_atomic_host_allocations, sycl::aspect::usm_shared_allocations, |
| 401 | + sycl::aspect::usm_atomic_shared_allocations, sycl::aspect::usm_system_allocations }, |
| 402 | + .extensions = { |
| 403 | + "cl_khr_int64_base_atomics", |
| 404 | + "cl_khr_int64_extended_atomics", |
| 405 | +#if SIMSYCL_FEATURE_HALF_TYPE |
| 406 | + "cl_khr_fp16", |
| 407 | +#endif |
| 408 | + }, |
387 | 409 | .printf_buffer_size = std::numeric_limits<std::size_t>::max(), |
388 | | - .preferred_interop_user_sync = true, |
389 | 410 | .partition_max_sub_devices = 0, |
390 | 411 | .partition_properties = {}, |
391 | 412 | .partition_affinity_domains = {sycl::info::partition_affinity_domain::not_applicable}, |
392 | 413 | .partition_type_property = sycl::info::partition_property::no_partition, |
393 | 414 | .partition_type_affinity_domain = sycl::info::partition_affinity_domain::not_applicable, |
394 | 415 | }; |
| 416 | +// clang-format off |
395 | 417 |
|
396 | 418 | const system_config builtin_system{ |
397 | 419 | .platforms = {{"SimSYCL", builtin_platform}}, |
|
0 commit comments