False sharing
Overview
κ±°μ§ κ³΅μ λ μΊμ± λ©μ»€λμ¦μ μν΄ κ΄λ¦¬λλ κ°μ₯ μμ 리μμ€ λΈλ‘ ν¬κΈ°μ λΆμ°λκ³ μΌκ΄λ μΊμκ° μλ μμ€ν μμ λ°μν μ μλ μ±λ₯ μ ν μ¬μ© ν¨ν΄μ΄λ€.
- λ νλ‘μΈμλ€μ΄ κ°κΈ° λ€λ₯Έ λ€λ₯Έ μ£Όμμ writeλ₯Ό νλ €κ³ νλ, μ΄ μ£Όμλ€μ΄ κ°μ μΊμ λΌμΈμ λ§€νλ 쑰건μ λ§νλ€.
- νλ‘μΈμλ€μ μΊμ μ¬μ΄μμ μΊμ λΌμΈμ μλ‘ μ°λ μν©μ΄ λ°μνκ² λλ©΄, cache coherence protocolμΌλ‘ μΈν΄ μλΉν μμ ν΅μ μ λ°μμν¨λ€.
Example
#include <cstdio>
#include <chrono>
#include <pthread.h>
constexpr size_t
#if defined(__cpp_lib_hardware_interference_size)
CACHE_LINE_SIZE = hardware_destructive_interference_size,
#else
CACHE_LINE_SIZE = 64,
#endif
MAX_THREADS = 8, MANY_ITERATIONS = 1000000000;
void* worker(void* arg) {
volatile int* counter = (int*)arg;
for (int i = 0; i < MANY_ITERATIONS; i++) (*counter)++;
return NULL;
}
void test1(int num_threads) {
auto begin = std::chrono::high_resolution_clock::now();
pthread_t threads[MAX_THREADS];
int counter[MAX_THREADS];
for (int i = 0; i < num_threads; i++)
pthread_create(&threads[i], NULL, &worker, &counter[i]);
for (int i = 0; i < num_threads; i++)
pthread_join(threads[i], NULL);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
}
struct padded_t
{
int counter;
char padding[CACHE_LINE_SIZE - sizeof(int)];
};
void test2(int num_threads) {
auto begin = std::chrono::high_resolution_clock::now();
pthread_t threads[MAX_THREADS];
padded_t counter[MAX_THREADS];
for (int i = 0; i < num_threads; i++)
pthread_create(&threads[i], NULL, &worker, &(counter[i].counter));
for (int i = 0; i < num_threads; i++)
pthread_join(threads[i], NULL);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
}
int main()
{
test1(8);
test2(8);
}
μ μ½λλ₯Ό μ€ννμ λ, μλμ κ°μ κ²°κ³Όλ₯Ό μ»μ μ μλ€.
Time measured: 2.946 seconds.
Time measured: 2.533 seconds.