Comme on sait que Analyse lexicale est la première phase du compilateur également connue sous le nom de scanner. Il convertit le programme d'entrée en une séquence de jetons.
Un programme C se compose de divers jetons et un jeton est soit un mot-clé, un identifiant, une constante, une chaîne littérale ou un symbole.
Par exemple:
- Mots-clés : pendant tandis que si etc.
- Identifiant : nom de la variable, nom de la fonction, etc.
- Opérateurs : '+' '++' '-' etc.
- Séparateurs : ' ' ';' etc.
Exemple : Pour la saisie'int a = b + 1c;'il identifiera'int'comme mot clé'a'comme identifiant'='en tant qu'opérateur, etc.
Approche :
- L'idée est de diviser la chaîne d'entrée (un code C) en jetons tels que des mots-clés, des identifiants, des opérateurs, des nombres entiers et des nombres réels.
- Il vérifie chaque jeton à l'aide de fonctions d'assistance (
isKeywordisIntegervalidIdentifieretc.) et imprime sa catégorie.
Ci-dessous se trouve un programme pour imprimer tous les mots-clés littéraux identifiants valides identifiants invalides nombre entier nombre réel dans un code C donné :
C++
#include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter(char ch) { if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == '<' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') return (true); return (false); } // Returns 'true' if the character is an OPERATOR. bool isOperator(char ch) { if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=') return (true); return (false); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier(char* str) { if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true) return (false); return (true); } // Returns 'true' if the string is a KEYWORD. bool isKeyword(char* str) { if (!strcmp(str 'if') || !strcmp(str 'else') || !strcmp(str 'while') || !strcmp(str 'do') || !strcmp(str 'break') || !strcmp(str 'continue') || !strcmp(str 'int') || !strcmp(str 'double') || !strcmp(str 'float') || !strcmp(str 'return') || !strcmp(str 'char') || !strcmp(str 'case') || !strcmp(str 'char') || !strcmp(str 'sizeof') || !strcmp(str 'long') || !strcmp(str 'short') || !strcmp(str 'typedef') || !strcmp(str 'switch') || !strcmp(str 'unsigned') || !strcmp(str 'void') || !strcmp(str 'static') || !strcmp(str 'struct') || !strcmp(str 'goto')) return (true); return (false); } // Returns 'true' if the string is an INTEGER. bool isInteger(char* str) { int i len = strlen(str); if (len == 0) return (false); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-' && i > 0)) return (false); } return (true); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber(char* str) { int i len = strlen(str); bool hasDecimal = false; if (len == 0) return (false); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] != '.' || (str[i] == '-' && i > 0)) return (false); if (str[i] == '.') hasDecimal = true; } return (hasDecimal); } // Extracts the SUBSTRING. char* subString(char* str int left int right) { int i; char* subStr = (char*)malloc( sizeof(char) * (right - left + 2)); for (i = left; i <= right; i++) subStr[i - left] = str[i]; subStr[right - left + 1] = ' '; return (subStr); } // Parsing the input STRING. void parse(char* str) { int left = 0 right = 0; int len = strlen(str); while (right <= len && left <= right) { if (isDelimiter(str[right]) == false) right++; if (isDelimiter(str[right]) == true && left == right) { if (isOperator(str[right]) == true) printf(''%c' IS AN OPERATORn' str[right]); right++; left = right; } else if (isDelimiter(str[right]) == true && left != right || (right == len && left != right)) { char* subStr = subString(str left right - 1); if (isKeyword(subStr) == true) printf(''%s' IS A KEYWORDn' subStr); else if (isInteger(subStr) == true) printf(''%s' IS AN INTEGERn' subStr); else if (isRealNumber(subStr) == true) printf(''%s' IS A REAL NUMBERn' subStr); else if (validIdentifier(subStr) == true && isDelimiter(str[right - 1]) == false) printf(''%s' IS A VALID IDENTIFIERn' subStr); else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) == false) printf(''%s' IS NOT A VALID IDENTIFIERn' subStr); left = right; } } return; } // DRIVER FUNCTION int main() { // maximum length of string is 100 here char str[100] = 'int a = b + 1c; '; parse(str); // calling the parse function return (0); }
C #include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter(char ch) { if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == '<' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') return (true); return (false); } // Returns 'true' if the character is an OPERATOR. bool isOperator(char ch) { if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=') return (true); return (false); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier(char* str) { if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true) return (false); return (true); } // Returns 'true' if the string is a KEYWORD. bool isKeyword(char* str) { if (!strcmp(str 'if') || !strcmp(str 'else') || !strcmp(str 'while') || !strcmp(str 'do') || !strcmp(str 'break') || !strcmp(str 'continue') || !strcmp(str 'int') || !strcmp(str 'double') || !strcmp(str 'float') || !strcmp(str 'return') || !strcmp(str 'char') || !strcmp(str 'case') || !strcmp(str 'char') || !strcmp(str 'sizeof') || !strcmp(str 'long') || !strcmp(str 'short') || !strcmp(str 'typedef') || !strcmp(str 'switch') || !strcmp(str 'unsigned') || !strcmp(str 'void') || !strcmp(str 'static') || !strcmp(str 'struct') || !strcmp(str 'goto')) return (true); return (false); } // Returns 'true' if the string is an INTEGER. bool isInteger(char* str) { int i len = strlen(str); if (len == 0) return (false); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-' && i > 0)) return (false); } return (true); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber(char* str) { int i len = strlen(str); bool hasDecimal = false; if (len == 0) return (false); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] != '.' || (str[i] == '-' && i > 0)) return (false); if (str[i] == '.') hasDecimal = true; } return (hasDecimal); } // Extracts the SUBSTRING. char* subString(char* str int left int right) { int i; char* subStr = (char*)malloc( sizeof(char) * (right - left + 2)); for (i = left; i <= right; i++) subStr[i - left] = str[i]; subStr[right - left + 1] = ' '; return (subStr); } // Parsing the input STRING. void parse(char* str) { int left = 0 right = 0; int len = strlen(str); while (right <= len && left <= right) { if (isDelimiter(str[right]) == false) right++; if (isDelimiter(str[right]) == true && left == right) { if (isOperator(str[right]) == true) printf(''%c' IS AN OPERATORn' str[right]); right++; left = right; } else if (isDelimiter(str[right]) == true && left != right || (right == len && left != right)) { char* subStr = subString(str left right - 1); if (isKeyword(subStr) == true) printf(''%s' IS A KEYWORDn' subStr); else if (isInteger(subStr) == true) printf(''%s' IS AN INTEGERn' subStr); else if (isRealNumber(subStr) == true) printf(''%s' IS A REAL NUMBERn' subStr); else if (validIdentifier(subStr) == true && isDelimiter(str[right - 1]) == false) printf(''%s' IS A VALID IDENTIFIERn' subStr); else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) == false) printf(''%s' IS NOT A VALID IDENTIFIERn' subStr); left = right; } } return; } // DRIVER FUNCTION int main() { // maximum length of string is 100 here char str[100] = 'int a = b + 1c; '; parse(str); // calling the parse function return (0); }
Java import java.util.Arrays; public class Parser { public static boolean isDelimiter(char ch) { return ' +-*/ ;><=()[]{}'.indexOf(ch) != -1; } public static boolean isOperator(char ch) { return '+-*/><='.indexOf(ch) != -1; } public static boolean validIdentifier(String str) { if (str.isEmpty() || Character.isDigit(str.charAt(0)) || isDelimiter(str.charAt(0))) return false; return true; } public static boolean isKeyword(String str) { String[] keywords = { 'if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto' }; return Arrays.asList(keywords).contains(str); } public static boolean isInteger(String str) { if (str.isEmpty()) return false; for (int i = 0; i < str.length(); i++) { if (!Character.isDigit(str.charAt(i)) && !(str.charAt(i) == '-' && i == 0)) return false; } return true; } public static boolean isRealNumber(String str) { if (str.isEmpty()) return false; boolean hasDecimal = false; for (int i = 0; i < str.length(); i++) { if (!Character.isDigit(str.charAt(i)) && str.charAt(i) != '.' && !(str.charAt(i) == '-' && i == 0)) return false; if (str.charAt(i) == '.') hasDecimal = true; } return hasDecimal; } public static String subString(String str int left int right) { return str.substring(left right + 1); } public static void parse(String str) { int left = 0 right = 0; int len = str.length(); while (right <= len && left <= right) { if (!isDelimiter(str.charAt(right))) right++; if (isDelimiter(str.charAt(right)) && left == right) { if (isOperator(str.charAt(right))) System.out.println(''' + str.charAt(right) + '' IS AN OPERATOR'); right++; left = right; } else if (isDelimiter(str.charAt(right)) && left != right || (right == len && left != right)) { String subStr = subString(str left right - 1); if (isKeyword(subStr)) System.out.println(''' + subStr + '' IS A KEYWORD'); else if (isInteger(subStr)) System.out.println(''' + subStr + '' IS AN INTEGER'); else if (isRealNumber(subStr)) System.out.println(''' + subStr + '' IS A REAL NUMBER'); else if (validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1))) System.out.println(''' + subStr + '' IS A VALID IDENTIFIER'); else if (!validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1))) System.out.println(''' + subStr + '' IS NOT A VALID IDENTIFIER'); left = right; } } } public static void main(String[] args) { String str = 'int a = b + 1c; '; parse(str); } }
Python def is_delimiter(ch): return ch in ' +-*/ ;><=()[]{}' def is_operator(ch): return ch in '+-*/><=' def valid_identifier(str): if not str or str[0].isdigit() or is_delimiter(str[0]): return False return True def is_keyword(str): keywords = ['if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto'] return str in keywords def is_integer(str): if not str: return False for i in range(len(str)): if not str[i].isdigit() and not (str[i] == '-' and i == 0): return False return True def is_real_number(str): if not str: return False has_decimal = False for i in range(len(str)): if not str[i].isdigit() and str[i] != '.' and not (str[i] == '-' and i == 0): return False if str[i] == '.': has_decimal = True return has_decimal def sub_string(str left right): return str[left:right + 1] def parse(str): left = 0 right = 0 len_str = len(str) while right <= len_str and left <= right: if not is_delimiter(str[right]): right += 1 if is_delimiter(str[right]) and left == right: if is_operator(str[right]): print(f''{str[right]}' IS AN OPERATOR') right += 1 left = right elif is_delimiter(str[right]) and left != right or (right == len_str and left != right): sub_str = sub_string(str left right - 1) if is_keyword(sub_str): print(f''{sub_str}' IS A KEYWORD') elif is_integer(sub_str): print(f''{sub_str}' IS AN INTEGER') elif is_real_number(sub_str): print(f''{sub_str}' IS A REAL NUMBER') elif valid_identifier(sub_str) and not is_delimiter(str[right - 1]): print(f''{sub_str}' IS A VALID IDENTIFIER') elif not valid_identifier(sub_str) and not is_delimiter(str[right - 1]): print(f''{sub_str}' IS NOT A VALID IDENTIFIER') left = right if __name__ == '__main__': str = 'int a = b + 1c; ' parse(str)
Sortir:
'int' IS A KEYWORD 'a' IS A VALID IDENTIFIER '=' IS AN OPERATOR 'b' IS A VALID IDENTIFIER '+' IS AN OPERATOR '1c' IS NOT A VALID IDENTIFIER