import csv
import sys
import re
reader = csv.DictReader(open(sys.argv[1]))
LEAF_ROW_1 = 'bad crop leaf and note'
LEAF_ROW_2 = 'Ok but could use improvement'
LEAF_RE = '\d+'
reslist = []
for row in reader:
leafs = re.findall(LEAF_RE, row[LEAF_ROW_1])
leafs += re.findall(LEAF_RE, row[LEAF_ROW_2])
leafs = list(map(int, leafs))
leafs = list(map(str, leafs))
identifier = row['identifier']
type_ = 'train'
pages = ','.join(leafs)
reason = row[LEAF_ROW_1] + row[LEAF_ROW_2]
reslist.append({'identifier': identifier,
'type': type_,
'pages': pages,
'reason': reason})
import yaml
