- -> no of charecters words lines
- FILE_PATH = "./text.txt"
- defmain() ->None:
- c, l, w = 0, 0, 0
- withopen(FILE_PATH, "r") as f:
- for line in f.readlines():
- c += len(line)
- w += len(line.split())
- l += 1
- print(f"Number of char: {c}, line: {l} and word: {w}")
- if__name__ == "__main__":
- exit(main() or0)
- ->identify different tokens and token types from a file.
- fromenumimportEnum
- FILE_PATH = "./test.txt"
- classTokType(Enum):
- String = 1
- Constant = 2
- Special = 3
- deftokens(data: list):
- toks = []
- cl = 0
- forlineindata:
- cl += 1
- fortokinline.split():
- try:
- int(tok)
- toks.append((cl, tok, TokType.Constant))
- exceptValueError:
- iftok.isalpha():
- toks.append((cl, tok, TokType.String))
- else:
- toks.append((cl, tok, TokType.Special))
- returntoks
- defmain() ->None:
- data = []
- withopen(FILE_PATH, "r") asf:
- data = f.readlines()
- toks = tokens(data)
- print(f"Total token(s): {len(toks)}")
- print("Tokens:-")
- fortokintoks:
- print(f"line: {tok[0]}\"{tok[1]}\"\tis\t{tok[2]}")
- if__name__ == "__main__":
- exit(main() or0)
- ->to implement lexical analyzer without using lex tool.
- fromenumimportEnum
- FILE_PATH = "./hello.c"
- K_WORDS = ["int", "float", "char", "return", "double", "break", "continue", "if", "else", "for", "while", "do", "include"]
- classTkType(Enum):
- Ident = 0
- Literal = 1
- Symbol = 2
- Const = 3
- Keyword = 4
- deftoks(data: str):
- a = data
- tks = []
- buf = []
- n = len(data)
- i = 0
- l, c = 1, 1
- whilei<n:
- ifa[i] == ''ora[i] == '\t':
- c += 1
- i += 1
- continue
- ifa[i] == '\n':
- l += 1
- c = 1
- i += 1
- continue
- buf.clear()
- ifa[i].isalpha():
- whilei<nanda[i].isalpha():
- buf.append(a[i])
- i += 1
- c += 1
- i -= 1
- if"".join(buf) inK_WORDS:
- tks.append(((l, c - len(buf)), "".join(buf), TkType.Keyword))
- else:
- tks.append(((l, c - len(buf)), "".join(buf), TkType.Ident))
- elifa[i].isdigit():
- whilei<nanda[i].isdigit():
- buf.append(a[i])
- i += 1
- c += 1
- i -= 1
- tks.append(((l, c - len(buf)), "".join(buf), TkType.Const))
- elifa[i] == '"':
- i += 1
- whilei<nanda[i] != '"':
- buf.append(a[i])
- i += 1
- c += 1
- tks.append(((l, c - len(buf)), '"' + "".join(buf) + '"', TkType.Literal))
- else:
- tks.append(((l, c), a[i], TkType.Symbol))
- i += 1
- c += 1
- returntks
- defmain() ->None:
- data = ""
- withopen(FILE_PATH, "r") asf:
- data = f.read()
- tks = toks(data)
- print(f"TokenType\tline:col:Token")
- fortkintks:
- print(f"{tk[2]}\t{tk[0][0]}:{tk[0][1]}:{tk[1]}")
- if__name__ == "__main__":
- exit(main() or0)