Compare commits

...

18 Commits

Author SHA1 Message Date
344455e409 deprecated db.transaction 2022-03-28 17:40:43 +02:00
654f580fc5 new python code formatter 2022-03-28 17:05:30 +02:00
107e1241d7 typos 2021-09-13 13:52:56 +02:00
110f18a62f perf improvements on parco_monza_classico view 2021-09-13 13:51:08 +02:00
ddd2daf8cf monticello view 2021-09-13 13:48:23 +02:00
1b8674a6c0 removed unrelated comment 2021-09-13 11:25:54 +02:00
ca69ec961e parco_monza_classico e arcore classico views 2021-09-13 11:24:49 +02:00
d711766571 comment on what resolve_geo_location really is 2021-09-12 14:20:41 +02:00
cdd6418c79 merge training_duration and training_info into 1 view 2021-09-12 14:18:27 +02:00
364329dc0e more generous Trezzo sull'Adda radius 2021-09-12 11:51:25 +02:00
81551b4890 added more places in the resolve_geo_location lut 2021-09-12 11:51:03 +02:00
868dcb89f2 resolve_geo_location(1) immutable lut function 2021-09-12 11:26:28 +02:00
2d802b67d5 training_duration with the "distinct on" paradigm
This improves performance against the previous "partition by" approach
2021-09-12 11:25:30 +02:00
64c4938c88 btree 2021-09-12 09:50:03 +02:00
4fd00bc8fd hash index on training_data.training_id 2021-09-12 09:31:40 +02:00
2c45db029d don't print stats after execution 2021-09-11 17:19:16 +02:00
b9a575219a overall stats as training_info view 2021-03-02 09:00:57 +01:00
aa792ce33a training_info view 2021-03-02 09:00:13 +01:00
4 changed files with 201 additions and 99 deletions

146
init.sql
View File

@ -20,31 +20,129 @@ create table if not exists training_data(
constraint fk_training_id foreign key(training_id) references training(id) on delete cascade
);
create index if not exists idx_training_data_t on training_data(t);
create index if not exists idx_training_data_training_id on training_data(training_id);
create index if not exists idx_training_data_geog on training_data using GIST(geog);
create or replace view training_duration as
(select td_start.id as id,
td_start.geog as start_location,
td_end.geog as end_location,
td_start.t as start_time,
td_end.t as end_time,
date_trunc('second', (td_end.t - td_start.t)) as duration
from (select td_row_n.t, td_row_n.id, td_row_n.geog
from (select *,
row_number() over (partition by td.training_id order by td.t asc) as row_n
from training_data td join training t on (t.id = td.training_id)
order by td.t asc) as td_row_n
where td_row_n.row_n = 1
order by td_row_n.t asc) td_start
join
(select td_row_n.t, td_row_n.id, td_row_n.geog
from (select *,
row_number() over (partition by td.training_id order by td.t desc) as row_n
from training_data td join training t on (t.id = td.training_id)
order by td.t asc) as td_row_n
where td_row_n.row_n = 1) td_end
-- poor man's reverse geocoding
create or replace function resolve_geo_location(geography) returns text as $$
begin
case when st_dwithin($1, st_point(45.516114, 9.216108), 1000) then
return 'Bicocca';
when st_dwithin($1, st_point(45.058302, 11.644477), 2000) then
return 'Villanova';
when st_dwithin($1, st_point(45.106055, 11.794563), 2000) then
return 'Boara';
when st_dwithin($1, st_point(45.610874, 9.522227), 2000) then
return 'Trezzo sull` Adda';
when st_dwithin($1, st_point(45.645741, 9.265780), 2000) then
return 'Sovico';
when st_dwithin($1, st_point(45.588173, 9.275549), 3000) then
return 'Monza';
else
return 'unknown';
end case;
end
$$ language plpgsql immutable;
on (td_start.id = td_end.id)
create or replace view training_info as
(
select
td_start.training_id as training_id,
td_start.t as start_time,
td_end.t as end_time,
resolve_geo_location(td_start.geog) AS start_location,
resolve_geo_location(td_end.geog) AS end_location,
date_trunc('second'::text, td_end.t - td_start.t) AS duration,
round(((t.moving_distance)/1000)::numeric, 2) as distance,
round(( (t.moving_distance / 1000) / (select extract(epoch from date_trunc('second'::text, td_end.t - td_start.t))/3600))::numeric, 1) as pace_kmh
from
(
select distinct on (td2.training_id) *
from training_data td2 order by td2.training_id, t asc
) td_start
join
(
select distinct on (td2.training_id) *
from training_data td2 order by td2.training_id, t desc
) td_end
on (td_start.training_id = td_end.training_id)
join training t on (t.id = td_start.training_id)
order by td_start.t asc
);
---------------------------------------------
-- trovare allenamenti usando georeference --
---------------------------------------------
-- COSTANTI
-- via mirabello (dentro al parco): st_point(45.607115, 9.283687)
-- rotatoria Tamoil (per la via del nord): st_point(45.622779, 9.276274)
-- giro del parco di monza
create or replace view parco_monza_classico as
(
select duration,
distance,
pace_kmh,
start_time,
end_time,
start_location,
end_location,
training_id
from training_info ti
where ti.training_id in (
-- trovo id allenamenti che passano sicuramente per il parco di monza
select td.training_id
from training_data td
join training_data td2 on (td.training_id = td2.training_id)
where
distance < 31 and
st_dwithin(td.geog, st_point(45.607115, 9.283687), 20) -- se passo a 20 metri da via mirabello dentro al parco
and not st_dwithin(td2.geog, st_point(45.622779, 9.276274), 20) -- e se non passo dal tamoil
)
);
-- giro di arcore
create or replace view arcore as
(
select duration,
distance,
pace_kmh,
start_time,
end_time,
start_location,
end_location,
training_id
from training_info ti
where ti.training_id in (
select td.training_id
from training_data td join training_data td2 on (td.training_id = td2.training_id)
where
distance < 42 and
end_location = 'Bicocca' and
st_dwithin(td.geog, st_point(45.622779, 9.276274), 20) -- se passo a 20 metri dal Tamoil
and st_dwithin(td2.geog, st_point(45.631299, 9.308985), 30) -- e se passo a 30 metri dalla rotatoria per arcore
)
);
create or replace view monticello as
(
select duration,
distance,
pace_kmh,
start_time,
end_time,
start_location,
end_location,
training_id
from training_info ti
where ti.training_id in (
select td.training_id
from training_data td join training_data td2 on (td.training_id = td2.training_id) join training_data td3 on (td.training_id = td3.training_id)
where
distance < 61 and
end_location = 'Bicocca' and
st_dwithin(td.geog, st_point(45.622779, 9.276274), 20) -- se passo per il Tamoil
and st_dwithin(td2.geog, st_point(45.631299, 9.308985), 30) -- e se passo a 30 metri dalla rotatoria per arcore
and st_dwithin(td3.geog, st_point(45.705275, 9.305889), 20) -- e se passo a 20 metri dalla fontanella di Monticello
)
);

131
main.py
View File

@ -7,33 +7,31 @@ from glob import glob
from email.message import Message
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Connection
import gpxpy
from bs4 import BeautifulSoup
from stats import print_stats
config = configparser.ConfigParser()
config.read('config.ini')
config.read("config.ini")
db = create_engine(f"postgresql://{config['db']['username']}:{config['db']['password']}@{config['db']['host']}/{config['db']['database']}").connect()
db = create_engine(
f"postgresql://{config['db']['username']}:{config['db']['password']}@{config['db']['host']}/{config['db']['database']}"
).connect()
mail = IMAP4(host=config['mail']['host'])
fitotrack_msg_filter = 'ALL'
mail = IMAP4(host=config["mail"]["host"])
fitotrack_msg_filter = "ALL"
def init_database():
with open('init.sql') as f:
db.execute('\n'.join(f.readlines()))
with open("init.sql") as f:
db.execute("\n".join(f.readlines()))
def _get_sender(msg: Message) -> str:
sender: str = msg.get('from')
if ' ' in sender:
sender = sender.split(' ')
sender: str = msg.get("from")
if " " in sender:
sender = sender.split(" ")
for field in sender:
if '@' in field and '<' in field and '>' in field:
if "@" in field and "<" in field and ">" in field:
return field[1:-1]
return sender
@ -41,88 +39,113 @@ def _get_sender(msg: Message) -> str:
def get_gpx_files_from_mail():
mail.starttls(ssl.create_default_context())
mail.login(config['mail']['username'], config['mail']['password'])
mail.login(config["mail"]["username"], config["mail"]["password"])
mail.select()
_, ids = mail.search(None, fitotrack_msg_filter)
ids = ids[0].split()
for i in ids:
_, fetched = mail.fetch(i, '(RFC822)')
_, fetched = mail.fetch(i, "(RFC822)")
email_message = email.message_from_bytes(fetched[0][1])
sender = _get_sender(email_message)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart' or part.get_content_disposition() is None:
if (
part.get_content_maintype() == "multipart"
or part.get_content_disposition() is None
):
continue
filename = part.get_filename()
if filename:
filename = f'{sender}_{filename}'
if not os.path.exists(f'gpx_files/{filename}'):
with open(f'gpx_files/{filename}', 'wb') as f:
print(f'creating {filename}')
filename = f"{sender}_{filename}"
if not os.path.exists(f"gpx_files/{filename}"):
with open(f"gpx_files/{filename}", "wb") as f:
print(f"creating {filename}")
f.write(part.get_payload(decode=True))
mail.store(i, '+FLAGS', '\\Deleted')
mail.store(i, "+FLAGS", "\\Deleted")
mail.expunge()
mail.close()
mail.logout()
def process_gpx_files(tx: Connection):
for filepath in glob('gpx_files/*.gpx'):
owner = os.path.split(filepath)[-1].split('_workout-')[0]
def process_gpx_files():
for filepath in glob("gpx_files/*.gpx"):
owner = os.path.split(filepath)[-1].split("_workout-")[0]
filename = f'workout-{os.path.split(filepath)[-1].split("_workout-")[1]}'
print(f'Processing {filename}')
if list(tx.execute(text('select exists(select from training where owner = :owner and filename = :filename)'),
dict(owner=owner, filename=filename,),),)[0][0]:
print(f"Processing {filename}")
if list(
db.execute(
text(
"select exists(select from training where owner = :owner and filename = :filename)"
),
dict(
owner=owner,
filename=filename,
),
),
)[0][0]:
os.remove(filepath)
continue
with open(filepath) as f:
gpx_file = gpxpy.parse(f)
if gpx_file.creator != 'FitoTrack':
raise ValueError('gpx file not generated by the FitoTrack app')
training_id = list(tx.execute(
text("""
if gpx_file.creator != "FitoTrack":
raise ValueError("gpx file not generated by the FitoTrack app")
training_id = list(
db.execute(
text(
"""
insert into training(owner, filename, type, description, moving_time, stopped_time, moving_distance, stopped_distance) values
(:owner, :filename, :type, :description, :moving_time, :stopped_time, :moving_distance, :stopped_distance) returning id
"""),
dict(owner=owner,
filename=filename,
type='cycling', # TODO other training types
description=gpx_file.description,
moving_time=gpx_file.get_moving_data().moving_time,
stopped_time=gpx_file.get_moving_data().stopped_time,
moving_distance=gpx_file.get_moving_data().moving_distance,
stopped_distance=gpx_file.get_moving_data().stopped_distance,),
))[0][0]
"""
),
dict(
owner=owner,
filename=filename,
type="cycling", # TODO other training types
description=gpx_file.description,
moving_time=gpx_file.get_moving_data().moving_time,
stopped_time=gpx_file.get_moving_data().stopped_time,
moving_distance=gpx_file.get_moving_data().moving_distance,
stopped_distance=gpx_file.get_moving_data().stopped_distance,
),
)
)[0][0]
for track in gpx_file.tracks:
for segment in track.segments:
for point in segment.points:
tx.execute(text("""
db.execute(
text(
"""
insert into training_data(training_id, t, geog, speed, elevation)
values (:training_id, :t, :geog, :speed, :elevation)
"""),
dict(training_id=training_id,
t=point.time,
geog=f'POINT({point.latitude} {point.longitude})',
speed=point.speed,
elevation=point.elevation,),)
"""
),
dict(
training_id=training_id,
t=point.time,
geog=f"POINT({point.latitude} {point.longitude})",
speed=point.speed,
elevation=point.elevation,
),
)
os.remove(filepath)
def main():
try:
os.mkdir('gpx_files')
os.mkdir("gpx_files")
except FileExistsError:
pass
init_database()
get_gpx_files_from_mail()
db.transaction(process_gpx_files)
print_stats(db)
with db.begin():
process_gpx_files()
if __name__ == '__main__':
if __name__ == "__main__":
try:
main()
except (KeyboardInterrupt, EOFError):

View File

@ -3,4 +3,5 @@ gpxpy
psycopg2
bs4
sqlparse
tabulate
tabulate
black

View File

@ -1,20 +0,0 @@
from sqlalchemy.engine import Connection
from tabulate import tabulate
def print_stats(db: Connection):
overall_stats = """
select distinct
td.t::date,
date_trunc('second', training_duration.start_time::time) as start_time,
date_trunc('second', training_duration.end_time::time) as end_time,
training_duration.duration as duration,
round(((t.moving_distance)/1000)::numeric, 2) as distance,
round(( (t.moving_distance / 1000) / (select extract(epoch from training_duration.duration)/3600))::numeric, 1) as pace_kmh
from training t
join training_data td on (t.id = td.training_id)
join training_duration on (t.id = training_duration.id)
order by td.t::date, date_trunc('second', training_duration.start_time::time) asc
"""
print(tabulate(list(db.execute(overall_stats)),
headers = ["date", "start_time", "end_time", "duration", "distance", "pace (km/h)"]))