added authion extractor
This commit is contained in:
@@ -5,6 +5,8 @@ import re
|
||||
import pyexcel
|
||||
from collections import namedtuple
|
||||
|
||||
from . import merge
|
||||
|
||||
def extract_from_file(path):
|
||||
""" Extract and yields notes from the file at path """
|
||||
pass
|
||||
@@ -21,25 +23,39 @@ def extract_test():
|
||||
date = f"{day} mars 2020"
|
||||
yield NoteInfo(person, category, date, time, author, text)
|
||||
|
||||
def sanitize_text(text):
|
||||
return text.strip().capitalize()
|
||||
|
||||
|
||||
|
||||
def authion_extractor(sheet):
|
||||
""" Extract data from Authion format """
|
||||
pass
|
||||
def sanitize_row(row):
|
||||
return row
|
||||
|
||||
def sanitize_category(name):
|
||||
return name.strip().capitalize()
|
||||
def process_rows(iterable):
|
||||
return merge.fill_with_sentinel(1, 0,
|
||||
merge.fill_with_master(1, 2,
|
||||
merge.fill_empty_cells(1, iterable)))
|
||||
|
||||
for row in process_rows(sheet.rows()):
|
||||
try:
|
||||
(day, time, author, category, text) = row
|
||||
if text:
|
||||
yield (day, sanitize_text(category), sanitize_text(time), author, text)
|
||||
except ValueError:
|
||||
print(f"Invalid row (Authion): {row}")
|
||||
|
||||
def couasnon_extractor(sheet):
|
||||
""" Extract data from Couasnon format """
|
||||
from . import merge
|
||||
|
||||
def sanitize_row(row):
|
||||
(day, cat, matin, soir, nuit, *rest) = row
|
||||
if not day or not cat:
|
||||
raise ValueError
|
||||
return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
|
||||
return (day, sanitize_text(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
|
||||
|
||||
for row in tuple(merge.fill_value(0, sheet.rows())):
|
||||
for row in tuple(merge.fill_empty_cells(0, sheet.rows())):
|
||||
try:
|
||||
(day, category, matin, soir, nuit) = sanitize_row(row)
|
||||
if matin:
|
||||
@@ -102,7 +118,7 @@ class Notes(dict):
|
||||
CONFIG = (
|
||||
{ "source": "/home/artus/Projets/python/xerus/test/",
|
||||
"extractor": authion_extractor,
|
||||
"file_pattern": "(\d) (\w) (\d).ods",
|
||||
"file_pattern": "\d{2} (\w+) (\d{4}).ods",
|
||||
},
|
||||
{ "source": "/home/artus/Projets/python/xerus/test/",
|
||||
"extractor": couasnon_extractor,
|
||||
|
||||
@@ -15,10 +15,68 @@ def rows_merged(rows, conf={}):
|
||||
pass
|
||||
|
||||
|
||||
def fill_value(idx, iterable):
|
||||
def fill_empty_cells(idx, iterable):
|
||||
""" Fill the value at index with the last found """
|
||||
value = None
|
||||
for row in iterable:
|
||||
value = row[idx] or value
|
||||
row[idx] = value
|
||||
yield row
|
||||
|
||||
|
||||
def fill_with_sentinel(sentinel, target, iterable):
|
||||
""" Find a cycle in sentinel column, and fill the target column with unique value """
|
||||
buff = []
|
||||
|
||||
value = None
|
||||
seen = set()
|
||||
last_seen = None
|
||||
for row in iterable:
|
||||
if row[sentinel] != last_seen:
|
||||
last_seen = row[sentinel]
|
||||
if last_seen in seen:
|
||||
# Flush the buffer
|
||||
for r in buff:
|
||||
r[target] = value
|
||||
yield r
|
||||
buff.clear()
|
||||
seen.clear()
|
||||
seen.add(last_seen)
|
||||
|
||||
value = row[target] or value
|
||||
buff.append(row)
|
||||
|
||||
for r in buff:
|
||||
r[target] = value
|
||||
yield r
|
||||
|
||||
|
||||
def fill_with_master(master, target, iterable):
|
||||
""" Fill the target column cells with unique value while master column is unchanged """
|
||||
buff = []
|
||||
target_value = None
|
||||
last_in_master = None
|
||||
for row in iterable:
|
||||
# Search for target value
|
||||
if last_in_master and row[master] != last_in_master:
|
||||
if not target_value:
|
||||
print("Warning: No target value ->",
|
||||
list(map(lambda i: (i[master], i[target]), buff)))
|
||||
# Fill target and yield buffer
|
||||
for buff_row in buff:
|
||||
buff_row[target] = target_value
|
||||
yield buff_row
|
||||
buff.clear()
|
||||
target_value = None
|
||||
|
||||
target_value = row[target] or target_value
|
||||
last_in_master = row[master]
|
||||
buff.append(row)
|
||||
|
||||
for buff_row in buff:
|
||||
buff_row[target] = target_value
|
||||
yield buff_row
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user