// Fast solution to the challenge at: // https://www.hackerrank.com/challenges/acm-icpc-team // // By Daniel L. Taylor // Copyright (C) 2017-2024 Taylor Design. All Rights Reserved. // Discussed on my blog at: // http://www.taylordesign.net/code-optimization/of-bits-and-strings/ // // In this case speed is achieved by encoding the data properly // and letting the CPU do what it was designed to do. // // Speed could potentially be improved further by using 128-bit or 256-bit // vector unit (MMX or SSE) instructions for the bitwise OR and // compare operations. #include #include #include #include #include #include #include #include #include typedef unsigned long int uli; const int kMaxTopics = 500; void stringToBits(char *s, uli *bits, int rowSize); static inline int countTeamTopics(uli *bits, int rowSize, int iTeamA, int iTeamB); int main() { // The code below sets a file to stdin and times the results for testing. // If left in it will cause the solution to fail at HackerRank. // char test_file_path[] = ""; // freopen(test_file_path, "r", stdin); // Start the clock. // static double time_consumed = 0; // clock_t start, end; // start = clock(); // Get parameters. int pc, tc; scanf("%d %d", &pc, &tc); // Allocate bits. This solution is going to convert every 0 and 1 // in the input string to a single bit in memory. int cellSize = sizeof(uli) * 8; int rowSize = (tc/cellSize) + 1; uli *bits = calloc(rowSize * pc, sizeof(uli)); // Scan each string into the array. char *s = calloc(kMaxTopics + 1, sizeof(char)); for (int pi = 0; pi < pc; pi++){ // In any real production code scanf should always be constrained. // Example: scanf("%500s", s); scanf("%s", s); stringToBits(s, &bits[rowSize*pi], rowSize); } // Determine the max topic count and the teams that // know the maximum number of topics. int c = pc-1; int maxTopics = 0; int maxTeams = 0; for (int i = 0; i < c; i++){ for (int j = i+1; j < pc; j++){ int tc = countTeamTopics(bits, rowSize, i, j); if (tc > maxTopics){ maxTopics = tc; maxTeams = 1; } else if (tc == maxTopics){ maxTeams++; } } } // Output the results. printf("%d\n%d\n", maxTopics, maxTeams); // Cleanup. free(bits); free(s); // Print the time. // end = clock(); // time_consumed += (double)(end - start); // printf("%f\n", time_consumed / CLOCKS_PER_SEC); return 0; } static inline int countTeamTopics(uli *bits, int rowSize, int iTeamA, int iTeamB) { // The multiple steps are so that the code is easier to read // and so that there are intermediate variables to inspect while in the debugger. // One of the keys to efficiency here is that the CPU is comparing the topics // in the most natural way possible short of using the vector unit. // // Note that in testing inline improved performance slightly. int tc = 0; for (int i = 0; i < rowSize; i++){ uli a = bits[(rowSize*iTeamA)+i]; uli b = bits[(rowSize*iTeamB)+i]; uli v = a | b; // In Xcode this builtin was much faster than a hand coded 8-bit lookup table. // Since counting the set bits is relatively expensive this cut the time in half. // But that may not be the case on every compiler, and this builtin // may not be available every where. It is on HackerRank. tc += __builtin_popcountl(v); } return tc; } void stringToBits(char *s, uli *bits, int rowSize) { // strtoul is fast but will keep consuming characters past the bit length of uli. // It returns ULONG_MAX when it does. // So you have to insert/remove nulls at the appropriate positions to stop it. // I left the last segment to process outside the loop because there's no need // to set a null in that case, and doing so slightly increases the risk of a // buffer overrun if someone played with the definitions or code above. // // Note that in testing using an inline on this function did not help and often // made times slightly worse. int nullOffset = sizeof(uli) * 8; int i, c = rowSize-1; uli v; for (i = 0; i < c; i++){ char temp = s[nullOffset]; s[nullOffset] = 0; v = strtoul(s, &s, 2); bits[i] = v; s[0] = temp; // sp now points directly to the null. } // Last segment. v = strtoul(s, &s, 2); bits[i] = v; }