From dc22f5355440ddc904f28338a56ba44f64fc9edb Mon Sep 17 00:00:00 2001 From: Artus Date: Mon, 13 Apr 2020 15:07:15 +0200 Subject: [PATCH] working for couasnon files --- .gitignore | 7 +++ xerus/data.py | 140 +++++++++++++++++++++++++++++++++++-------------- xerus/gui.py | 1 - xerus/merge.py | 24 +++++++++ 4 files changed, 131 insertions(+), 41 deletions(-) create mode 100644 xerus/merge.py diff --git a/.gitignore b/.gitignore index 7f7cccc..87dd858 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +# Test data files +test/ +*.ods +*.xlsx + +run.sh + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/xerus/data.py b/xerus/data.py index 6dfd3bf..e2a9392 100644 --- a/xerus/data.py +++ b/xerus/data.py @@ -1,44 +1,10 @@ # -*- coding:utf-8 -*- import random +import os +import re +import pyexcel from collections import namedtuple -def test_data(): - return { "Henri" : - { "Santé": { "23 mars 2020": [] }, - "Comportement": { "23 mars 2020": [] }, - "Quotidien": { - "23 mars 2020": [ - ( "Matin", "Bien mangé ce matin. Sorti faire un tour pour voir les poules" ), - ( "Après-midi", "Rien de particulier" ), - ], - "24 mars 2020": [ - ( "Matin", "Bien mangé ce matin. Sorti faire un tour pour voir les poules", ), - ( "Après-midi", "Rien de particulier" ), - ], - "25 mars 2020": [ - ( "Matin", "Bien mangé ce matin. Sorti faire un tour pour voir les poules" ), - ( "Après-midi", "Rien de particulier" ), - ], - }, - "Infos": { "23 mars 2020": [] }, - "Activité": { "23 mars 2020": [] }, - }, - "Étienne": - { "Santé": { "23 mars 2020": [] }, - "Comportement": { "23 mars 2020": [] }, - "Quotidien": { "23 mars 2020": [] }, - "Infos": { "23 mars 2020": [] }, - "Activité": { "23 mars 2020": [] }, - }, - "Julia" : - { "Santé": { "23 mars 2020": [] }, - "Comportement": { "23 mars 2020": [] }, - "Quotidien": { "23 mars 2020": [] }, - "Infos": { "23 mars 2020": [] }, - "Activité": { "23 mars 2020": [] }, - }, - } - def extract_from_file(path): """ Extract and yields notes from the file at path """ pass @@ -56,20 +22,114 @@ def extract_test(): yield NoteInfo(person, category, date, time, author, text) +def authion_extractor(sheet): + """ Extract data from Authion format """ + pass + +def sanitize_category(name): + return name.strip().capitalize() + +def couasnon_extractor(sheet): + """ Extract data from Couasnon format """ + from . import merge + + def sanitize_row(row): + (day, cat, matin, soir, nuit, *rest) = row + if not day or not cat: + raise ValueError + return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip()) + + for row in tuple(merge.fill_value(0, sheet.rows())): + try: + (day, category, matin, soir, nuit) = sanitize_row(row) + if matin: + yield (day, category, "Matin", None, matin) + if soir: + yield (day, category, "Soir", None, soir) + if nuit: + yield (day, category, "Nuit", None, nuit) + except ValueError: + print(f"Invalid row (Couasnon): {row}") + + +def get_valid_books(source, pattern): + """ Returns all valid paths of data files """ + print(f"Loading {pattern} from {source}") + m = re.compile(pattern) + for (root, _, filenames) in os.walk(source): + for filename in filenames: + matches = m.match(filename) + if matches: + print(f"Found {filename}") + yield (pyexcel.load_book(file_name=os.path.join(root, filename)), + matches.group(1), + matches.group(2), + ) + + +def sanitize_date(day, month, year): + month = month.strip().capitalize() + year = year.strip() + return f"{day} {month} {year}" + +def extract_all(sources): + """ Main function to extract all notes """ + for config in sources: + for (workbook, month, year) in get_valid_books(config["source"], config["file_pattern"]): + print(f"Loading data from {month} {year}") + # Iter on sheets + sheets = workbook.sheet_names()[1:] + print(f"Found {len(sheets)} persons") + # Parse every sheet + for person in sheets: + for (day, category, time, author, text) in config["extractor"]( + workbook.sheet_by_name(person) + ): + yield NoteInfo( + person, + category, + sanitize_date(day, month, year), + time, + author, + text + ) + + class Notes(dict): """ Store of notes grouped by subject and category """ + CONFIG = ( + { "source": "/home/artus/Projets/python/xerus/test/", + "extractor": authion_extractor, + "file_pattern": "(\d) (\w) (\d).ods", + }, + { "source": "/home/artus/Projets/python/xerus/test/", + "extractor": couasnon_extractor, + "file_pattern": "(\w*) (\d{4}).ods", + }, + ) + def __init__(self, source): super().__init__() self.categories = [] - self.load(source) + self.load(self.CONFIG) + print(self) - def load(self, source): - if source is "testdata": + def __repr__(self): + return f"" + + def load(self, config): + if config is "testdata": for note in extract_test(): self.insert(note.person, note.category, note.date , time=note.time, text=note.text) + else: + for note in extract_all(config): + if not note.category: + raise ValueError(f"Note without category: {note}") + self.insert(note.person, note.category, note.date, + time=note.time, text=note.text) def insert(self, person, category, date, time=None, author=None, text=None): """ Inserts a new note """ diff --git a/xerus/gui.py b/xerus/gui.py index 22bff0e..b470665 100644 --- a/xerus/gui.py +++ b/xerus/gui.py @@ -63,7 +63,6 @@ class NoteViewer(tk.Frame): self.refresh_text(None) def refresh_text(self, event): - print("Refresh text :", event) self.text.config(state=tk.NORMAL) self.text.delete(1.0, tk.END) try: diff --git a/xerus/merge.py b/xerus/merge.py new file mode 100644 index 0000000..bf20589 --- /dev/null +++ b/xerus/merge.py @@ -0,0 +1,24 @@ +# -*- coding:utf-8 -*- + +import pyexcel + +""" + +Merger module + +Utilities to merge data vertically. + +""" + +def rows_merged(rows, conf={}): + """ Returns an iterable over rows with cell values merged (vertically) """ + pass + + +def fill_value(idx, iterable): + """ Fill the value at index with the last found """ + value = None + for row in iterable: + value = row[idx] or value + row[idx] = value + yield row