diff --git a/xerus/data.py b/xerus/data.py index e2a9392..bbc70bc 100644 --- a/xerus/data.py +++ b/xerus/data.py @@ -5,6 +5,8 @@ import re import pyexcel from collections import namedtuple +from . import merge + def extract_from_file(path): """ Extract and yields notes from the file at path """ pass @@ -21,25 +23,39 @@ def extract_test(): date = f"{day} mars 2020" yield NoteInfo(person, category, date, time, author, text) +def sanitize_text(text): + return text.strip().capitalize() + + def authion_extractor(sheet): """ Extract data from Authion format """ - pass + def sanitize_row(row): + return row -def sanitize_category(name): - return name.strip().capitalize() + def process_rows(iterable): + return merge.fill_with_sentinel(1, 0, + merge.fill_with_master(1, 2, + merge.fill_empty_cells(1, iterable))) + + for row in process_rows(sheet.rows()): + try: + (day, time, author, category, text) = row + if text: + yield (day, sanitize_text(category), sanitize_text(time), author, text) + except ValueError: + print(f"Invalid row (Authion): {row}") def couasnon_extractor(sheet): """ Extract data from Couasnon format """ - from . import merge def sanitize_row(row): (day, cat, matin, soir, nuit, *rest) = row if not day or not cat: raise ValueError - return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip()) + return (day, sanitize_text(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip()) - for row in tuple(merge.fill_value(0, sheet.rows())): + for row in tuple(merge.fill_empty_cells(0, sheet.rows())): try: (day, category, matin, soir, nuit) = sanitize_row(row) if matin: @@ -102,7 +118,7 @@ class Notes(dict): CONFIG = ( { "source": "/home/artus/Projets/python/xerus/test/", "extractor": authion_extractor, - "file_pattern": "(\d) (\w) (\d).ods", + "file_pattern": "\d{2} (\w+) (\d{4}).ods", }, { "source": "/home/artus/Projets/python/xerus/test/", "extractor": couasnon_extractor, diff --git a/xerus/merge.py b/xerus/merge.py index bf20589..4a6bd6c 100644 --- a/xerus/merge.py +++ b/xerus/merge.py @@ -15,10 +15,68 @@ def rows_merged(rows, conf={}): pass -def fill_value(idx, iterable): +def fill_empty_cells(idx, iterable): """ Fill the value at index with the last found """ value = None for row in iterable: value = row[idx] or value row[idx] = value yield row + + +def fill_with_sentinel(sentinel, target, iterable): + """ Find a cycle in sentinel column, and fill the target column with unique value """ + buff = [] + + value = None + seen = set() + last_seen = None + for row in iterable: + if row[sentinel] != last_seen: + last_seen = row[sentinel] + if last_seen in seen: + # Flush the buffer + for r in buff: + r[target] = value + yield r + buff.clear() + seen.clear() + seen.add(last_seen) + + value = row[target] or value + buff.append(row) + + for r in buff: + r[target] = value + yield r + + +def fill_with_master(master, target, iterable): + """ Fill the target column cells with unique value while master column is unchanged """ + buff = [] + target_value = None + last_in_master = None + for row in iterable: + # Search for target value + if last_in_master and row[master] != last_in_master: + if not target_value: + print("Warning: No target value ->", + list(map(lambda i: (i[master], i[target]), buff))) + # Fill target and yield buffer + for buff_row in buff: + buff_row[target] = target_value + yield buff_row + buff.clear() + target_value = None + + target_value = row[target] or target_value + last_in_master = row[master] + buff.append(row) + + for buff_row in buff: + buff_row[target] = target_value + yield buff_row + + + +