added authion extractor
This commit is contained in:
@@ -5,6 +5,8 @@ import re
|
|||||||
import pyexcel
|
import pyexcel
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from . import merge
|
||||||
|
|
||||||
def extract_from_file(path):
|
def extract_from_file(path):
|
||||||
""" Extract and yields notes from the file at path """
|
""" Extract and yields notes from the file at path """
|
||||||
pass
|
pass
|
||||||
@@ -21,25 +23,39 @@ def extract_test():
|
|||||||
date = f"{day} mars 2020"
|
date = f"{day} mars 2020"
|
||||||
yield NoteInfo(person, category, date, time, author, text)
|
yield NoteInfo(person, category, date, time, author, text)
|
||||||
|
|
||||||
|
def sanitize_text(text):
|
||||||
|
return text.strip().capitalize()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def authion_extractor(sheet):
|
def authion_extractor(sheet):
|
||||||
""" Extract data from Authion format """
|
""" Extract data from Authion format """
|
||||||
pass
|
def sanitize_row(row):
|
||||||
|
return row
|
||||||
|
|
||||||
def sanitize_category(name):
|
def process_rows(iterable):
|
||||||
return name.strip().capitalize()
|
return merge.fill_with_sentinel(1, 0,
|
||||||
|
merge.fill_with_master(1, 2,
|
||||||
|
merge.fill_empty_cells(1, iterable)))
|
||||||
|
|
||||||
|
for row in process_rows(sheet.rows()):
|
||||||
|
try:
|
||||||
|
(day, time, author, category, text) = row
|
||||||
|
if text:
|
||||||
|
yield (day, sanitize_text(category), sanitize_text(time), author, text)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Invalid row (Authion): {row}")
|
||||||
|
|
||||||
def couasnon_extractor(sheet):
|
def couasnon_extractor(sheet):
|
||||||
""" Extract data from Couasnon format """
|
""" Extract data from Couasnon format """
|
||||||
from . import merge
|
|
||||||
|
|
||||||
def sanitize_row(row):
|
def sanitize_row(row):
|
||||||
(day, cat, matin, soir, nuit, *rest) = row
|
(day, cat, matin, soir, nuit, *rest) = row
|
||||||
if not day or not cat:
|
if not day or not cat:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
return (day, sanitize_category(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
|
return (day, sanitize_text(cat), str(matin).strip(), str(soir).strip(), str(nuit).strip())
|
||||||
|
|
||||||
for row in tuple(merge.fill_value(0, sheet.rows())):
|
for row in tuple(merge.fill_empty_cells(0, sheet.rows())):
|
||||||
try:
|
try:
|
||||||
(day, category, matin, soir, nuit) = sanitize_row(row)
|
(day, category, matin, soir, nuit) = sanitize_row(row)
|
||||||
if matin:
|
if matin:
|
||||||
@@ -102,7 +118,7 @@ class Notes(dict):
|
|||||||
CONFIG = (
|
CONFIG = (
|
||||||
{ "source": "/home/artus/Projets/python/xerus/test/",
|
{ "source": "/home/artus/Projets/python/xerus/test/",
|
||||||
"extractor": authion_extractor,
|
"extractor": authion_extractor,
|
||||||
"file_pattern": "(\d) (\w) (\d).ods",
|
"file_pattern": "\d{2} (\w+) (\d{4}).ods",
|
||||||
},
|
},
|
||||||
{ "source": "/home/artus/Projets/python/xerus/test/",
|
{ "source": "/home/artus/Projets/python/xerus/test/",
|
||||||
"extractor": couasnon_extractor,
|
"extractor": couasnon_extractor,
|
||||||
|
|||||||
@@ -15,10 +15,68 @@ def rows_merged(rows, conf={}):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def fill_value(idx, iterable):
|
def fill_empty_cells(idx, iterable):
|
||||||
""" Fill the value at index with the last found """
|
""" Fill the value at index with the last found """
|
||||||
value = None
|
value = None
|
||||||
for row in iterable:
|
for row in iterable:
|
||||||
value = row[idx] or value
|
value = row[idx] or value
|
||||||
row[idx] = value
|
row[idx] = value
|
||||||
yield row
|
yield row
|
||||||
|
|
||||||
|
|
||||||
|
def fill_with_sentinel(sentinel, target, iterable):
|
||||||
|
""" Find a cycle in sentinel column, and fill the target column with unique value """
|
||||||
|
buff = []
|
||||||
|
|
||||||
|
value = None
|
||||||
|
seen = set()
|
||||||
|
last_seen = None
|
||||||
|
for row in iterable:
|
||||||
|
if row[sentinel] != last_seen:
|
||||||
|
last_seen = row[sentinel]
|
||||||
|
if last_seen in seen:
|
||||||
|
# Flush the buffer
|
||||||
|
for r in buff:
|
||||||
|
r[target] = value
|
||||||
|
yield r
|
||||||
|
buff.clear()
|
||||||
|
seen.clear()
|
||||||
|
seen.add(last_seen)
|
||||||
|
|
||||||
|
value = row[target] or value
|
||||||
|
buff.append(row)
|
||||||
|
|
||||||
|
for r in buff:
|
||||||
|
r[target] = value
|
||||||
|
yield r
|
||||||
|
|
||||||
|
|
||||||
|
def fill_with_master(master, target, iterable):
|
||||||
|
""" Fill the target column cells with unique value while master column is unchanged """
|
||||||
|
buff = []
|
||||||
|
target_value = None
|
||||||
|
last_in_master = None
|
||||||
|
for row in iterable:
|
||||||
|
# Search for target value
|
||||||
|
if last_in_master and row[master] != last_in_master:
|
||||||
|
if not target_value:
|
||||||
|
print("Warning: No target value ->",
|
||||||
|
list(map(lambda i: (i[master], i[target]), buff)))
|
||||||
|
# Fill target and yield buffer
|
||||||
|
for buff_row in buff:
|
||||||
|
buff_row[target] = target_value
|
||||||
|
yield buff_row
|
||||||
|
buff.clear()
|
||||||
|
target_value = None
|
||||||
|
|
||||||
|
target_value = row[target] or target_value
|
||||||
|
last_in_master = row[master]
|
||||||
|
buff.append(row)
|
||||||
|
|
||||||
|
for buff_row in buff:
|
||||||
|
buff_row[target] = target_value
|
||||||
|
yield buff_row
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user