#include #include #include #include #include #include #include #include /* The state of the parser to be preserved between parsing different tokens. */ struct parser_state { /* * Pointer to the start of the previous token if it is "model". * NULL if previous token is not "model". */ char* context_model; /* * Pointer to the start of the previous token if it is a single-uppercase-letter token. * NULL if previous token is anything different. */ char* context_upper_letter; /* * Pointer to the start of the previous token if it is "Dual". * NULL if previous token is not "Dual". */ char* context_dual; /* * Pointer to the start of the previous token if it is "Core", "Dual-Core", "QuadCore", etc. * NULL if previous token is anything different. */ char* context_core; /* * Pointer to the start of the previous token if it is "Eng" or "Engineering", etc. * NULL if previous token is anything different. */ char* context_engineering; /* * Pointer to the '@' symbol in the brand string (separates frequency specification). * NULL if there is no '@' symbol. */ char* frequency_separator; /* Indicates whether the brand string (after transformations) contains frequency. */ bool frequency_token; /* Indicates whether the processor is of Xeon family (contains "Xeon" substring). */ bool xeon; /* Indicates whether the processor model number was already parsed. */ bool parsed_model_number; /* Indicates whether the processor is an engineering sample (contains "Engineering Sample" or "Eng Sample" substrings). */ bool engineering_sample; }; /** @brief Resets information about the previous token. Keeps all other state information. */ static void reset_context(struct parser_state* state) { state->context_model = NULL; state->context_upper_letter = NULL; state->context_dual = NULL; state->context_core = NULL; } /** * @brief Overwrites the supplied string with space characters if it exactly matches the given string. * @param string The string to be compared against other string, and erased in case of matching. * @param length The length of the two string to be compared against each other. * @param target The string to compare against. * @retval true If the two strings match and the first supplied string was erased (overwritten with space characters). * @retval false If the two strings are different and the first supplied string remained unchanged. */ static inline bool erase_matching(char* string, size_t length, const char* target) { const bool match = memcmp(string, target, length) == 0; if (match) { memset(string, ' ', length); } return match; } /** * @brief Checks if the supplied ASCII character is an uppercase latin letter. * @param character The character to analyse. * @retval true If the supplied character is an uppercase latin letter ('A' to 'Z'). * @retval false If the supplied character is anything different. */ static inline bool is_upper_letter(char character) { return (uint32_t) (character - 'A') <= (uint32_t)('Z' - 'A'); } /** * @brief Checks if the supplied ASCII character is a digit. * @param character The character to analyse. * @retval true If the supplied character is a digit ('0' to '9'). * @retval false If the supplied character is anything different. */ static inline bool is_digit(char character) { return (uint32_t) (character - '0') < UINT32_C(10); } static inline bool is_zero_number(const char* token_start, const char* token_end) { for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { if (*char_ptr != '0') { return false; } } return true; } static inline bool is_space(const char* token_start, const char* token_end) { for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { if (*char_ptr != ' ') { return false; } } return true; } static inline bool is_number(const char* token_start, const char* token_end) { for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { if (!is_digit(*char_ptr)) { return false; } } return true; } static inline bool is_model_number(const char* token_start, const char* token_end) { for (const char* char_ptr = token_start + 1; char_ptr < token_end; char_ptr++) { if (is_digit(char_ptr[-1]) && is_digit(char_ptr[0])) { return true; } } return false; } static inline bool is_frequency(const char* token_start, const char* token_end) { const size_t token_length = (size_t) (token_end - token_start); if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') { switch (token_end[-3]) { case 'K': case 'M': case 'G': return true; } } return false; } /** * @warning Input and output tokens can overlap */ static inline char* move_token(const char* token_start, const char* token_end, char* output_ptr) { const size_t token_length = (size_t) (token_end - token_start); memmove(output_ptr, token_start, token_length); return output_ptr + token_length; } static bool transform_token(char* token_start, char* token_end, struct parser_state* state) { const struct parser_state previousState = *state; reset_context(state); size_t token_length = (size_t) (token_end - token_start); if (state->frequency_separator != NULL) { if (token_start > state->frequency_separator) { if (state->parsed_model_number) { memset(token_start, ' ', token_length); } } } /* Early AMD and Cyrix processors have "tm" suffix for trademark, e.g. * "AMD-K6tm w/ multimedia extensions" * "Cyrix MediaGXtm MMXtm Enhanced" */ if (token_length > 2) { const char context_char = token_end[-3]; if (is_digit(context_char) || is_upper_letter(context_char)) { if (erase_matching(token_end - 2, 2, "tm")) { token_end -= 2; token_length -= 2; } } } if (token_length > 4) { /* Some early AMD CPUs have "AMD-" at the beginning, e.g. * "AMD-K5(tm) Processor" * "AMD-K6tm w/ multimedia extensions" * "AMD-K6(tm) 3D+ Processor" * "AMD-K6(tm)-III Processor" */ if (erase_matching(token_start, 4, "AMD-")) { token_start += 4; token_length -= 4; } } switch (token_length) { case 1: /* * On some Intel processors there is a space between the first letter of * the name and the number after it, e.g. * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz" * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz" * We want to merge these parts together, in reverse order, i.e. "X 990" -> "990X", "820" -> "820Q" */ if (is_upper_letter(token_start[0])) { state->context_upper_letter = token_start; return true; } break; case 2: /* Erase everything after "w/" in "AMD-K6tm w/ multimedia extensions" */ if (erase_matching(token_start, token_length, "w/")) { return false; } /* * Intel Xeon processors since Ivy Bridge use versions, e.g. * "Intel Xeon E3-1230 v2" * Some processor branch strings report them as "V", others report as "v". * Normalize the former (upper-case) to the latter (lower-case) version */ if (token_start[0] == 'V' && is_digit(token_start[1])) { token_start[0] = 'v'; return true; } break; case 3: /* * Erase "CPU" in brand string on Intel processors, e.g. * "Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz" * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz" * "Intel(R) Atom(TM) CPU Z2760 @ 1.80GHz" */ if (erase_matching(token_start, token_length, "CPU")) { return true; } /* * Erase everything after "SOC" on AMD System-on-Chips, e.g. * "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0" */ if (erase_matching(token_start, token_length, "SOC")) { return false; } /* * Erase "AMD" in brand string on AMD processors, e.g. * "AMD Athlon(tm) Processor" * "AMD Engineering Sample" * "Quad-Core AMD Opteron(tm) Processor 2344 HE" */ if (erase_matching(token_start, token_length, "AMD")) { return true; } /* * Erase "VIA" in brand string on VIA processors, e.g. * "VIA C3 Ezra" * "VIA C7-M Processor 1200MHz" * "VIA Nano L3050@1800MHz" */ if (erase_matching(token_start, token_length, "VIA")) { return true; } /* Erase "IDT" in brand string on early Centaur processors, e.g. "IDT WinChip 2-3D" */ if (erase_matching(token_start, token_length, "IDT")) { return true; } /* * Erase everything starting with "MMX" in * "Cyrix MediaGXtm MMXtm Enhanced" ("tm" suffix is removed by this point) */ if (erase_matching(token_start, token_length, "MMX")) { return false; } /* * Erase everything starting with "APU" on AMD processors, e.g. * "AMD A10-4600M APU with Radeon(tm) HD Graphics" * "AMD A10-7850K APU with Radeon(TM) R7 Graphics" * "AMD A6-6310 APU with AMD Radeon R4 Graphics" */ if (erase_matching(token_start, token_length, "APU")) { return false; } /* * Remember to discard string if it contains "Eng Sample", * e.g. "Eng Sample, ZD302046W4K43_36/30/20_2/8_A" */ if (memcmp(token_start, "Eng", token_length) == 0) { state->context_engineering = token_start; } break; case 4: /* Remember to erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */ if (memcmp(token_start, "Dual", token_length) == 0) { state->context_dual = token_start; } /* Remember if the processor is on Xeon family */ if (memcmp(token_start, "Xeon", token_length) == 0) { state->xeon = true; } /* Erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */ if (previousState.context_dual != NULL) { if (memcmp(token_start, "Core", token_length) == 0) { memset(previousState.context_dual, ' ', (size_t) (token_end - previousState.context_dual)); state->context_core = token_end; return true; } } break; case 5: /* * Erase "Intel" in brand string on Intel processors, e.g. * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz" * "Intel(R) Atom(TM) CPU D2700 @ 2.13GHz" * "Genuine Intel(R) processor 800MHz" */ if (erase_matching(token_start, token_length, "Intel")) { return true; } /* * Erase "Cyrix" in brand string on Cyrix processors, e.g. * "Cyrix MediaGXtm MMXtm Enhanced" */ if (erase_matching(token_start, token_length, "Cyrix")) { return true; } /* * Erase everything following "Geode" (but not "Geode" token itself) on Geode processors, e.g. * "Geode(TM) Integrated Processor by AMD PCS" * "Geode(TM) Integrated Processor by National Semi" */ if (memcmp(token_start, "Geode", token_length) == 0) { return false; } /* Remember to erase "model unknown" in "AMD Processor model unknown" */ if (memcmp(token_start, "model", token_length) == 0) { state->context_model = token_start; return true; } break; case 6: /* * Erase everything starting with "Radeon" or "RADEON" on AMD APUs, e.g. * "A8-7670K Radeon R7, 10 Compute Cores 4C+6G" * "FX-8800P Radeon R7, 12 Compute Cores 4C+8G" * "A12-9800 RADEON R7, 12 COMPUTE CORES 4C+8G" * "A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G" */ if (erase_matching(token_start, token_length, "Radeon") || erase_matching(token_start, token_length, "RADEON")) { return false; } /* * Erase "Mobile" when it is not part of the processor name, * e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82" */ if (previousState.context_core != NULL) { if (erase_matching(token_start, token_length, "Mobile")) { return true; } } /* Erase "family" in "Intel(R) Pentium(R) III CPU family 1266MHz" */ if (erase_matching(token_start, token_length, "family")) { return true; } /* Discard the string if it contains "Engineering Sample" */ if (previousState.context_engineering != NULL) { if (memcmp(token_start, "Sample", token_length) == 0) { state->engineering_sample = true; return false; } } break; case 7: /* * Erase "Geniune" in brand string on Intel engineering samples, e.g. * "Genuine Intel(R) processor 800MHz" * "Genuine Intel(R) CPU @ 2.13GHz" * "Genuine Intel(R) CPU 0000 @ 1.73GHz" */ if (erase_matching(token_start, token_length, "Genuine")) { return true; } /* * Erase "12-core" in brand string on AMD Threadripper, e.g. * "AMD Ryzen Threadripper 1920X 12-Core Processor" */ if (erase_matching(token_start, token_length, "12-Core")) { return true; } /* * Erase "16-core" in brand string on AMD Threadripper, e.g. * "AMD Ryzen Threadripper 1950X 16-Core Processor" */ if (erase_matching(token_start, token_length, "16-Core")) { return true; } /* Erase "model unknown" in "AMD Processor model unknown" */ if (previousState.context_model != NULL) { if (memcmp(token_start, "unknown", token_length) == 0) { memset(previousState.context_model, ' ', token_end - previousState.context_model); return true; } } /* * Discard the string if it contains "Eng Sample:" or "Eng Sample," e.g. * "AMD Eng Sample, ZD302046W4K43_36/30/20_2/8_A" * "AMD Eng Sample: 2D3151A2M88E4_35/31_N" */ if (previousState.context_engineering != NULL) { if (memcmp(token_start, "Sample,", token_length) == 0 || memcmp(token_start, "Sample:", token_length) == 0) { state->engineering_sample = true; return false; } } break; case 8: /* Erase "QuadCore" in "VIA QuadCore L4700 @ 1.2+ GHz" */ if (erase_matching(token_start, token_length, "QuadCore")) { state->context_core = token_end; return true; } /* Erase "Six-Core" in "AMD FX(tm)-6100 Six-Core Processor" */ if (erase_matching(token_start, token_length, "Six-Core")) { state->context_core = token_end; return true; } break; case 9: if (erase_matching(token_start, token_length, "Processor")) { return true; } if (erase_matching(token_start, token_length, "processor")) { return true; } /* Erase "Dual-Core" in "Pentium(R) Dual-Core CPU T4200 @ 2.00GHz" */ if (erase_matching(token_start, token_length, "Dual-Core")) { state->context_core = token_end; return true; } /* Erase "Quad-Core" in AMD processors, e.g. * "Quad-Core AMD Opteron(tm) Processor 2347 HE" * "AMD FX(tm)-4170 Quad-Core Processor" */ if (erase_matching(token_start, token_length, "Quad-Core")) { state->context_core = token_end; return true; } /* Erase "Transmeta" in brand string on Transmeta processors, e.g. * "Transmeta(tm) Crusoe(tm) Processor TM5800" * "Transmeta Efficeon(tm) Processor TM8000" */ if (erase_matching(token_start, token_length, "Transmeta")) { return true; } break; case 10: /* * Erase "Eight-Core" in AMD processors, e.g. * "AMD FX(tm)-8150 Eight-Core Processor" */ if (erase_matching(token_start, token_length, "Eight-Core")) { state->context_core = token_end; return true; } break; case 11: /* * Erase "Triple-Core" in AMD processors, e.g. * "AMD Phenom(tm) II N830 Triple-Core Processor" * "AMD Phenom(tm) 8650 Triple-Core Processor" */ if (erase_matching(token_start, token_length, "Triple-Core")) { state->context_core = token_end; return true; } /* * Remember to discard string if it contains "Engineering Sample", * e.g. "AMD Engineering Sample" */ if (memcmp(token_start, "Engineering", token_length) == 0) { state->context_engineering = token_start; return true; } break; } if (is_zero_number(token_start, token_end)) { memset(token_start, ' ', token_length); return true; } /* On some Intel processors the last letter of the name is put before the number, * and an additional space it added, e.g. * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz" * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz" * "Intel(R) Core(TM) i5 CPU M 480 @ 2.67GHz" * We fix this issue, i.e. "X 990" -> "990X", "Q 820" -> "820Q" */ if (previousState.context_upper_letter != 0) { /* A single letter token followed by 2-to-5 digit letter is merged together */ switch (token_length) { case 2: case 3: case 4: case 5: if (is_number(token_start, token_end)) { /* Load the previous single-letter token */ const char letter = *previousState.context_upper_letter; /* Erase the previous single-letter token */ *previousState.context_upper_letter = ' '; /* Move the current token one position to the left */ move_token(token_start, token_end, token_start - 1); token_start -= 1; /* * Add the letter on the end * Note: accessing token_start[-1] is safe because this is not the first token */ token_end[-1] = letter; } } } if (state->frequency_separator != NULL) { if (is_model_number(token_start, token_end)) { state->parsed_model_number = true; } } if (is_frequency(token_start, token_end)) { state->frequency_token = true; } return true; } uint32_t cpuinfo_x86_normalize_brand_string( const char raw_name[48], char normalized_name[48]) { normalized_name[0] = '\0'; char name[48]; memcpy(name, raw_name, sizeof(name)); /* * First find the end of the string * Start search from the end because some brand strings contain zeroes in the middle */ char* name_end = &name[48]; while (name_end[-1] == '\0') { /* * Adject name_end by 1 position and check that we didn't reach the start of the brand string. * This is possible if all characters are zero. */ if (--name_end == name) { /* All characters are zeros */ return 0; } } struct parser_state parser_state = { 0 }; /* Now unify all whitespace characters: replace tabs and '\0' with spaces */ { bool inside_parentheses = false; for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { switch (*char_ptr) { case '(': inside_parentheses = true; *char_ptr = ' '; break; case ')': inside_parentheses = false; *char_ptr = ' '; break; case '@': parser_state.frequency_separator = char_ptr; case '\0': case '\t': *char_ptr = ' '; break; default: if (inside_parentheses) { *char_ptr = ' '; } } } } /* Iterate through all tokens and erase redundant parts */ { bool is_token = false; char* token_start = NULL; for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { if (*char_ptr == ' ') { if (is_token) { is_token = false; if (!transform_token(token_start, char_ptr, &parser_state)) { name_end = char_ptr; break; } } } else { if (!is_token) { is_token = true; token_start = char_ptr; } } } if (is_token) { transform_token(token_start, name_end, &parser_state); } } /* If this is an engineering sample, return empty string */ if (parser_state.engineering_sample) { return 0; } /* Check if there is some string before the frequency separator. */ if (parser_state.frequency_separator != NULL) { if (is_space(name, parser_state.frequency_separator)) { /* If only frequency is available, return empty string */ return 0; } } /* Compact tokens: collapse multiple spacing into one */ { char* output_ptr = normalized_name; char* token_start = NULL; bool is_token = false; bool previous_token_ends_with_dash = true; bool current_token_starts_with_dash = false; uint32_t token_count = 1; for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { const char character = *char_ptr; if (character == ' ') { if (is_token) { is_token = false; if (!current_token_starts_with_dash && !previous_token_ends_with_dash) { token_count += 1; *output_ptr++ = ' '; } output_ptr = move_token(token_start, char_ptr, output_ptr); /* Note: char_ptr[-1] exists because there is a token before this space */ previous_token_ends_with_dash = (char_ptr[-1] == '-'); } } else { if (!is_token) { is_token = true; token_start = char_ptr; current_token_starts_with_dash = (character == '-'); } } } if (is_token) { if (!current_token_starts_with_dash && !previous_token_ends_with_dash) { token_count += 1; *output_ptr++ = ' '; } output_ptr = move_token(token_start, name_end, output_ptr); } if (parser_state.frequency_token && token_count <= 1) { /* The only remaining part is frequency */ normalized_name[0] = '\0'; return 0; } if (output_ptr < &normalized_name[48]) { *output_ptr = '\0'; } else { normalized_name[47] = '\0'; } return (uint32_t) (output_ptr - normalized_name); } } static const char* vendor_string_map[] = { [cpuinfo_vendor_intel] = "Intel", [cpuinfo_vendor_amd] = "AMD", [cpuinfo_vendor_via] = "VIA", [cpuinfo_vendor_hygon] = "Hygon", [cpuinfo_vendor_rdc] = "RDC", [cpuinfo_vendor_dmp] = "DM&P", [cpuinfo_vendor_transmeta] = "Transmeta", [cpuinfo_vendor_cyrix] = "Cyrix", [cpuinfo_vendor_rise] = "Rise", [cpuinfo_vendor_nsc] = "NSC", [cpuinfo_vendor_sis] = "SiS", [cpuinfo_vendor_nexgen] = "NexGen", [cpuinfo_vendor_umc] = "UMC", }; uint32_t cpuinfo_x86_format_package_name( enum cpuinfo_vendor vendor, const char normalized_brand_string[48], char package_name[CPUINFO_PACKAGE_NAME_MAX]) { if (normalized_brand_string[0] == '\0') { package_name[0] = '\0'; return 0; } const char* vendor_string = NULL; if ((uint32_t) vendor < (uint32_t) CPUINFO_COUNT_OF(vendor_string_map)) { vendor_string = vendor_string_map[(uint32_t) vendor]; } if (vendor_string == NULL) { strncpy(package_name, normalized_brand_string, CPUINFO_PACKAGE_NAME_MAX); package_name[CPUINFO_PACKAGE_NAME_MAX - 1] = '\0'; return 0; } else { snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "%s %s", vendor_string, normalized_brand_string); return (uint32_t) strlen(vendor_string) + 1; } }