diff --git a/.travis.yml b/.travis.yml index 060f36ca..63c7b2c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -115,7 +115,7 @@ matrix: - additional_defines="" - USE_VCPKG=true - VCPKG_DEFINES="-DCMAKE_TOOLCHAIN_FILE=$HOME/vcpkg/scripts/buildsystems/vcpkg.cmake" - - MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8" + - MATRIX_EVAL="" - os: linux compiler: clang @@ -189,7 +189,7 @@ matrix: - CUDA_PATH=/usr/local/cuda-10.0 - CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0 - LD_LIBRARY_PATH="/usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/lib64/stubs:${LD_LIBRARY_PATH}" - - MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8 && wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1" + - MATRIX_EVAL="wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1" # allow_failures: # - name: macOS - vcpkg diff --git a/src/gemm.c b/src/gemm.c index 0e1bd5ea..15138828 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -550,7 +550,26 @@ static inline float _castu32_f32(uint32_t a) { } static inline float _mm256_extract_float32(__m256 a, const int index) { - return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), index)); + switch(index) { + case 0: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0)); + case 1: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 1)); + case 2: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 2)); + case 3: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 3)); + case 4: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 4)); + case 5: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 5)); + case 6: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 6)); + case 7: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 7)); + default: + return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0)); + } } void asm_cpuid(uint32_t* abcd, uint32_t eax)