fixes for modern clang on linux

This commit is contained in:
Stefano Sinigardi 2019-07-30 17:39:18 +02:00
parent 6e7c3a53c7
commit 769f76ef62
2 changed files with 22 additions and 3 deletions

View File

@ -115,7 +115,7 @@ matrix:
- additional_defines=""
- USE_VCPKG=true
- VCPKG_DEFINES="-DCMAKE_TOOLCHAIN_FILE=$HOME/vcpkg/scripts/buildsystems/vcpkg.cmake"
- MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8"
- MATRIX_EVAL=""
- os: linux
compiler: clang
@ -189,7 +189,7 @@ matrix:
- CUDA_PATH=/usr/local/cuda-10.0
- CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0
- LD_LIBRARY_PATH="/usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/lib64/stubs:${LD_LIBRARY_PATH}"
- MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8 && wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1"
- MATRIX_EVAL="wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1"
# allow_failures:
# - name: macOS - vcpkg

View File

@ -550,7 +550,26 @@ static inline float _castu32_f32(uint32_t a) {
}
static inline float _mm256_extract_float32(__m256 a, const int index) {
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), index));
switch(index) {
case 0:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0));
case 1:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 1));
case 2:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 2));
case 3:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 3));
case 4:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 4));
case 5:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 5));
case 6:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 6));
case 7:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 7));
default:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0));
}
}
void asm_cpuid(uint32_t* abcd, uint32_t eax)