Script lexique de mots

From Mondothèque

  1. !/usr/bin/env/ python

import string

  1. remove punctuation

def remove_punct(f): tokens = (' '.join(line.replace('\n', ) for line in f)).lower() for c in string.punctuation: tokens= tokens.replace(c,"") return tokens

  1. add words of the text to set, a list of unique items

def lexicon(tokens): for word in tokens.split(" "): wordset.add(word) return wordset

  1. sort words alphabetically & write words to file

def publish(wordset): alphalist = sorted(list(wordset)) # can be reversed: (, reverse = True) for word in alphalist: words.write(word + "\n")

  1. define & open input/output file

f = open("1_notion.txt", "rt", encoding = "utf-8") words = open("mots.txt", 'wt', encoding = "utf-8") wordset = set()

  1. execute functions

tokens = remove_punct(f) wordset = lexicon(tokens) publish(wordset)

  1. close files

f.close() words.close()