diff --git a/base.c b/base.c new file mode 100644 index 0000000..854abd3 --- /dev/null +++ b/base.c @@ -0,0 +1,88 @@ +#include +#include +#include + +int main(void){ + + // Count nb of char + char cchar; + int size = 0; + FILE *fptr = fopen("test.txt", "r"); + while ((cchar = fgetc(fptr)) != EOF){ + size += 1; + } + + // Load file + char raw_file[size]; + fclose(fptr); + fptr = fopen("test.txt", "r"); + int i = 0; + while ((cchar = fgetc(fptr)) != EOF){ + raw_file[i] = cchar; + i++; + } + fclose(fptr); + + // Count nb of words and longest word + int nb_words = 0; + int longest = 0; + int c_size; + for (i = 0; i < size - 1; i++){ + if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) { + nb_words += 1; + if (c_size > longest){ + longest = c_size; + } + c_size = 0; + } else { + c_size++; + } + } + + // Load words + char words[nb_words][longest]; + int word = 0; + int chr = 0; + for (i = 0; i < size - 1; i++){ + if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) { + word++; + chr = 0; + } else { + char words[word][chr] = raw_file[i]; + chr++; + } + } + + // Count unique words + int freq_words[nb_words]; + memset(freq_words, 0, nb_words * sizeof(int)); + int uni_word = 0; + for (i = 0; i < nb_words; i++){ + if (freq_words[i] == 0){ + uni_word++; + freq_words[i] = uni_word; + for (int l = 0; l < nb_words; l++){ + int k = 0; + while ( words[i][k] == words[l][k] ){ + k += 1; + } + if (k == longest){ + freq_words[l] = uni_word; + } + } + } + } + + // Init node + Node nodes[uni_word]; + int c = 0; + for (i = 0; i < uni_word; i++){ + nodes[uni_word].weights[uni_word]; + memset(nodes[uni_word].weights, 0, uni_word * sizeof(int)); + } + for (i = 0; i < nb_words; i++){ + nodes[freq_words[i]].content = words[i]; + } + + return 0; +}