added authion extractor

This commit is contained in:
2020-04-13 23:16:22 +02:00
parent dc22f53554
commit a4b594b111
2 changed files with 82 additions and 8 deletions

View File

@@ -5,6 +5,8 @@ import re
import pyexcel import pyexcel
from collections import namedtuple from collections import namedtuple
from . import merge
def extract_from_file(path): def extract_from_file(path):
""" Extract and yields notes from the file at path """ """ Extract and yields notes from the file at path """
pass pass
@@ -21,25 +23,39 @@ def extract_test():
date = f"{day} mars 2020" date = f"{day} mars 2020"
yield NoteInfo(person, category, date, time, author, text) yield NoteInfo(person, category, date, time, author, text)
def sanitize_text(text):
return text.strip().capitalize()
def authion_extractor(sheet): def authion_extractor(sheet):
""" Extract data from Authion format """ """ Extract data from Authion format """
pass def sanitize_row(row):
return row
def sanitize_category(name): def process_rows(iterable):
return name.strip().capitalize() return merge.fill_with_sentinel(1, 0,
merge.fill_with_master(1, 2,
merge.fill_empty_cells(1, iterable)))
for row in process_rows(sheet.rows()):
try:
(day, time, author, category, text) = row
if text:
yield (day, sanitize_text(category), sanitize_text(time), author, text)
except ValueError:
print(f"Invalid row (Authion): {row}")
def couasnon_extractor(sheet): def couasnon_extractor(sheet):
""" Extract data from Couasnon format """ """ Extract data from Couasnon format """
from . import merge
def sanitize_row(row): def sanitize_row(row):
(day, cat, matin, soir, nuit, *rest) = row (day, cat, matin, soir, nuit, *rest) = row
if not day or not cat: if not day or not cat:
raise ValueError raise ValueError
return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip()) return (day, sanitize_text(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
for row in tuple(merge.fill_value(0, sheet.rows())): for row in tuple(merge.fill_empty_cells(0, sheet.rows())):
try: try:
(day, category, matin, soir, nuit) = sanitize_row(row) (day, category, matin, soir, nuit) = sanitize_row(row)
if matin: if matin:
@@ -102,7 +118,7 @@ class Notes(dict):
CONFIG = ( CONFIG = (
{ "source": "/home/artus/Projets/python/xerus/test/", { "source": "/home/artus/Projets/python/xerus/test/",
"extractor": authion_extractor, "extractor": authion_extractor,
"file_pattern": "(\d) (\w) (\d).ods", "file_pattern": "\d{2} (\w+) (\d{4}).ods",
}, },
{ "source": "/home/artus/Projets/python/xerus/test/", { "source": "/home/artus/Projets/python/xerus/test/",
"extractor": couasnon_extractor, "extractor": couasnon_extractor,

View File

@@ -15,10 +15,68 @@ def rows_merged(rows, conf={}):
pass pass
def fill_value(idx, iterable): def fill_empty_cells(idx, iterable):
""" Fill the value at index with the last found """ """ Fill the value at index with the last found """
value = None value = None
for row in iterable: for row in iterable:
value = row[idx] or value value = row[idx] or value
row[idx] = value row[idx] = value
yield row yield row
def fill_with_sentinel(sentinel, target, iterable):
""" Find a cycle in sentinel column, and fill the target column with unique value """
buff = []
value = None
seen = set()
last_seen = None
for row in iterable:
if row[sentinel] != last_seen:
last_seen = row[sentinel]
if last_seen in seen:
# Flush the buffer
for r in buff:
r[target] = value
yield r
buff.clear()
seen.clear()
seen.add(last_seen)
value = row[target] or value
buff.append(row)
for r in buff:
r[target] = value
yield r
def fill_with_master(master, target, iterable):
""" Fill the target column cells with unique value while master column is unchanged """
buff = []
target_value = None
last_in_master = None
for row in iterable:
# Search for target value
if last_in_master and row[master] != last_in_master:
if not target_value:
print("Warning: No target value ->",
list(map(lambda i: (i[master], i[target]), buff)))
# Fill target and yield buffer
for buff_row in buff:
buff_row[target] = target_value
yield buff_row
buff.clear()
target_value = None
target_value = row[target] or target_value
last_in_master = row[master]
buff.append(row)
for buff_row in buff:
buff_row[target] = target_value
yield buff_row