added authion extractor

This commit is contained in:
2020-04-13 23:16:22 +02:00
parent dc22f53554
commit a4b594b111
2 changed files with 82 additions and 8 deletions

View File

@@ -5,6 +5,8 @@ import re
import pyexcel
from collections import namedtuple
from . import merge
def extract_from_file(path):
""" Extract and yields notes from the file at path """
pass
@@ -21,25 +23,39 @@ def extract_test():
date = f"{day} mars 2020"
yield NoteInfo(person, category, date, time, author, text)
def sanitize_text(text):
return text.strip().capitalize()
def authion_extractor(sheet):
""" Extract data from Authion format """
pass
def sanitize_row(row):
return row
def sanitize_category(name):
return name.strip().capitalize()
def process_rows(iterable):
return merge.fill_with_sentinel(1, 0,
merge.fill_with_master(1, 2,
merge.fill_empty_cells(1, iterable)))
for row in process_rows(sheet.rows()):
try:
(day, time, author, category, text) = row
if text:
yield (day, sanitize_text(category), sanitize_text(time), author, text)
except ValueError:
print(f"Invalid row (Authion): {row}")
def couasnon_extractor(sheet):
""" Extract data from Couasnon format """
from . import merge
def sanitize_row(row):
(day, cat, matin, soir, nuit, *rest) = row
if not day or not cat:
raise ValueError
return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
return (day, sanitize_text(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
for row in tuple(merge.fill_value(0, sheet.rows())):
for row in tuple(merge.fill_empty_cells(0, sheet.rows())):
try:
(day, category, matin, soir, nuit) = sanitize_row(row)
if matin:
@@ -102,7 +118,7 @@ class Notes(dict):
CONFIG = (
{ "source": "/home/artus/Projets/python/xerus/test/",
"extractor": authion_extractor,
"file_pattern": "(\d) (\w) (\d).ods",
"file_pattern": "\d{2} (\w+) (\d{4}).ods",
},
{ "source": "/home/artus/Projets/python/xerus/test/",
"extractor": couasnon_extractor,

View File

@@ -15,10 +15,68 @@ def rows_merged(rows, conf={}):
pass
def fill_value(idx, iterable):
def fill_empty_cells(idx, iterable):
""" Fill the value at index with the last found """
value = None
for row in iterable:
value = row[idx] or value
row[idx] = value
yield row
def fill_with_sentinel(sentinel, target, iterable):
""" Find a cycle in sentinel column, and fill the target column with unique value """
buff = []
value = None
seen = set()
last_seen = None
for row in iterable:
if row[sentinel] != last_seen:
last_seen = row[sentinel]
if last_seen in seen:
# Flush the buffer
for r in buff:
r[target] = value
yield r
buff.clear()
seen.clear()
seen.add(last_seen)
value = row[target] or value
buff.append(row)
for r in buff:
r[target] = value
yield r
def fill_with_master(master, target, iterable):
""" Fill the target column cells with unique value while master column is unchanged """
buff = []
target_value = None
last_in_master = None
for row in iterable:
# Search for target value
if last_in_master and row[master] != last_in_master:
if not target_value:
print("Warning: No target value ->",
list(map(lambda i: (i[master], i[target]), buff)))
# Fill target and yield buffer
for buff_row in buff:
buff_row[target] = target_value
yield buff_row
buff.clear()
target_value = None
target_value = row[target] or target_value
last_in_master = row[master]
buff.append(row)
for buff_row in buff:
buff_row[target] = target_value
yield buff_row