From 88e5c6ec2c789939ddb30bd33bdfff3bb2847276 Mon Sep 17 00:00:00 2001 From: Francia Csaba Date: Fri, 9 Feb 2024 14:43:05 +0100 Subject: [PATCH] grfs export --- .../{gtfs_export => grfs_export}/__init__.py | 0 .../{gtfs_export => grfs_export}/gtfs.py | 6 +- .../gtfs_constants.py | 0 .../gtfs_export.py | 108 +++++++++++++++--- .../gtfs_generator.py | 16 +-- .../gtfsrt/__init__.py | 0 .../gtfsrt/gtfs_realtime_pb2.py | 0 .../gtfsrt/realtime_extension_pb2.py | 0 .../models/__init__.py | 0 .../models/gtfs.py | 8 +- .../{gtfs_export => grfs_export}/router.py | 22 ++-- pyproject.toml | 2 +- 12 files changed, 124 insertions(+), 38 deletions(-) rename amarillo/plugins/{gtfs_export => grfs_export}/__init__.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfs.py (95%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfs_constants.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfs_export.py (65%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfs_generator.py (80%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfsrt/__init__.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfsrt/gtfs_realtime_pb2.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/gtfsrt/realtime_extension_pb2.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/models/__init__.py (100%) rename amarillo/plugins/{gtfs_export => grfs_export}/models/gtfs.py (76%) rename amarillo/plugins/{gtfs_export => grfs_export}/router.py (76%) diff --git a/amarillo/plugins/gtfs_export/__init__.py b/amarillo/plugins/grfs_export/__init__.py similarity index 100% rename from amarillo/plugins/gtfs_export/__init__.py rename to amarillo/plugins/grfs_export/__init__.py diff --git a/amarillo/plugins/gtfs_export/gtfs.py b/amarillo/plugins/grfs_export/gtfs.py similarity index 95% rename from amarillo/plugins/gtfs_export/gtfs.py rename to amarillo/plugins/grfs_export/gtfs.py index 368d924..3e6e9c5 100644 --- a/amarillo/plugins/gtfs_export/gtfs.py +++ b/amarillo/plugins/grfs_export/gtfs.py @@ -1,6 +1,6 @@ -import amarillo.plugins.gtfs_export.gtfsrt.gtfs_realtime_pb2 as gtfs_realtime_pb2 -import amarillo.plugins.gtfs_export.gtfsrt.realtime_extension_pb2 as mfdzrte -from amarillo.plugins.gtfs_export.gtfs_constants import * +import amarillo.plugins.grfs_export.gtfsrt.gtfs_realtime_pb2 as gtfs_realtime_pb2 +import amarillo.plugins.grfs_export.gtfsrt.realtime_extension_pb2 as mfdzrte +from amarillo.plugins.grfs_export.gtfs_constants import * from google.protobuf.json_format import MessageToDict from google.protobuf.json_format import ParseDict from datetime import datetime, timedelta diff --git a/amarillo/plugins/gtfs_export/gtfs_constants.py b/amarillo/plugins/grfs_export/gtfs_constants.py similarity index 100% rename from amarillo/plugins/gtfs_export/gtfs_constants.py rename to amarillo/plugins/grfs_export/gtfs_constants.py diff --git a/amarillo/plugins/gtfs_export/gtfs_export.py b/amarillo/plugins/grfs_export/gtfs_export.py similarity index 65% rename from amarillo/plugins/gtfs_export/gtfs_export.py rename to amarillo/plugins/grfs_export/gtfs_export.py index ea0e8af..d0cee0e 100644 --- a/amarillo/plugins/gtfs_export/gtfs_export.py +++ b/amarillo/plugins/grfs_export/gtfs_export.py @@ -8,9 +8,12 @@ import logging import re from amarillo.utils.utils import assert_folder_exists -from amarillo.plugins.gtfs_export.models.gtfs import GtfsTimeDelta, GtfsFeedInfo, GtfsAgency, GtfsRoute, GtfsStop, GtfsStopTime, GtfsTrip, GtfsCalendar, GtfsCalendarDate, GtfsShape +from amarillo.plugins.grfs_export.models.gtfs import GtfsTimeDelta, GtfsFeedInfo, GtfsAgency, GtfsRoute, GtfsStop, GtfsStopTime, GtfsTrip, GtfsCalendar, GtfsCalendarDate, GtfsShape, GtfsDriver, GtfsAdditionalRidesharingInfo +from amarillo.models.Carpool import Driver, RidesharingInfo +from amarillo.utils.utils import geodesic_distance_in_m from amarillo.plugins.enhancer.services.stops import is_carpooling_stop -from amarillo.plugins.gtfs_export.gtfs_constants import * +from amarillo.plugins.enhancer.services.trips import Trip +from amarillo.plugins.grfs_export.gtfs_constants import * logger = logging.getLogger(__name__) @@ -19,7 +22,7 @@ class GtfsExport: stops_counter = 0 trips_counter = 0 - routes_counter = 0 + trip_counter = 0 stored_stops = {} @@ -32,6 +35,8 @@ class GtfsExport: self.stop_times = [] self.calendar = [] self.shapes = [] + self.drivers = {} #use a dictionary to avoid duplicate ids + self.additional_ridesharing_infos = [] self.agencies = agencies self.feed_info = feed_info self.localized_to = " nach " @@ -52,11 +57,14 @@ class GtfsExport: self._write_csvfile(gtfsfolder, 'stops.txt', self.stops.values()) self._write_csvfile(gtfsfolder, 'stop_times.txt', self.stop_times) self._write_csvfile(gtfsfolder, 'shapes.txt', self.shapes) + self._write_csvfile(gtfsfolder, 'driver.txt', self.drivers.values()) + self._write_csvfile(gtfsfolder, 'additional_ridesharing_info.txt', self.additional_ridesharing_infos) self._zip_files(gtfszip_filename, gtfsfolder) def _zip_files(self, gtfszip_filename, gtfsfolder): gtfsfiles = ['agency.txt', 'feed_info.txt', 'routes.txt', 'trips.txt', - 'calendar.txt', 'calendar_dates.txt', 'stops.txt', 'stop_times.txt', 'shapes.txt'] + 'calendar.txt', 'calendar_dates.txt', 'stops.txt', 'stop_times.txt', + 'shapes.txt', 'driver.txt', 'additional_ridesharing_info.txt'] with ZipFile(gtfszip_filename, 'w') as gtfszip: for gtfsfile in gtfsfiles: gtfszip.write(gtfsfolder+'/'+gtfsfile, gtfsfile) @@ -74,20 +82,93 @@ class GtfsExport: for stop in stopSet["stops"].itertuples(): self._load_stored_stop(stop) cloned_trips = dict(ridestore.trips) + groups, cloned_trips = self.group_trips_into_routes(cloned_trips) + for group in groups: + if self.bbox is None or any(trip.intersects(self.bbox) for trip in group.values()): + self.convert_route(group) for url, trip in cloned_trips.items(): + # TODO: convert ridesharing info and driver data if self.bbox is None or trip.intersects(self.bbox): self._convert_trip(trip) + + def group_trips_into_routes(self, trips: dict): + ungrouped_trips = dict(trips) + route_groups = list() + current_route_id = 1 + + while len(ungrouped_trips) > 0: + trip_id, current_trip = ungrouped_trips.popitem() + + current_group = {trip_id: current_trip} + current_trip.route_id = current_route_id + + for other_id, other_trip in list(ungrouped_trips.items()): + # if an ungrouped trip is close to any of the grouped trips, add it to the route group + if (any(self.trips_are_close(other_trip, grouped_trip) for grouped_trip in current_group.values())): + current_group[other_id] = ungrouped_trips.pop(other_id) + current_group[other_id].route_id = current_route_id + + + route_groups.append(current_group) + current_route_id += 1 + + return route_groups, trips - def _convert_trip(self, trip): - self.routes_counter += 1 - self.routes.append(self._create_route(trip)) + def trips_are_close(self, trip1, trip2): + trip1_start = trip1.path.coordinates[0] + trip1_end = trip1.path.coordinates[-1] + + trip2_start = trip2.path.coordinates[0] + trip2_end = trip2.path.coordinates[-1] + + res = self.within_range(trip1_start, trip2_start) and self.within_range(trip1_end, trip2_end) + return res + + def within_range(self, stop1, stop2): + MERGE_RANGE_M = 500 + return geodesic_distance_in_m(stop1, stop2) <= MERGE_RANGE_M + + def convert_route(self, route_group): + agency = "multiple" + + #if there is only one agency, use that + agencies = set(trip.agency for id, trip in route_group.items()) + if len(agencies) == 1: agency = agencies.pop() + trip = next(iter(route_group.values())) # grab any trip, relevant values should be the same + + self.routes.append(self._create_route(agency, trip.route_id, trip.route_name)) + + def _convert_trip(self, trip: Trip): + self.trip_counter += 1 self.calendar.append(self._create_calendar(trip)) if not trip.runs_regularly: self.calendar_dates.append(self._create_calendar_date(trip)) - self.trips.append(self._create_trip(trip, self.routes_counter)) + self.trips.append(self._create_trip(trip, self.trip_counter)) self._append_stops_and_stop_times(trip) - self._append_shapes(trip, self.routes_counter) + self._append_shapes(trip, self.trip_counter) + + if(trip.driver is not None): + self.drivers[trip.driver.driver_id] = self._convert_driver(trip.driver) + if(trip.additional_ridesharing_info is not None): + self.additional_ridesharing_infos.append( + self._convert_additional_ridesharing_info(trip.trip_id, trip.additional_ridesharing_info)) + def _convert_driver(self, driver: Driver): + return GtfsDriver(driver.driver_id, driver.profile_picture, driver.rating) + + def _convert_additional_ridesharing_info(self, trip_id, info: RidesharingInfo): + # if we don't specify .value, the enum will appear in the export as e.g. LuggageSize.large + # and missing optional values get None + def get_enum_value(enum): + return enum.value if enum is not None else None + + def format_date(date: datetime): + return date.strftime("%Y%m%d %H:%M:%S") + + return GtfsAdditionalRidesharingInfo( + trip_id, info.number_free_seats, get_enum_value(info.same_gender), get_enum_value(info.luggage_size), get_enum_value(info.animal_car), + info.car_model, info.car_brand, format_date(info.creation_date), get_enum_value(info.smoking), info.payment_method) + def _trip_headsign(self, destination): destination = destination.replace('(Deutschland)', '') destination = destination.replace(', Deutschland', '') @@ -111,8 +192,8 @@ class GtfsExport: logger.exception(ex) return destination - def _create_route(self, trip): - return GtfsRoute(trip.agency, trip.trip_id, trip.route_long_name(), RIDESHARING_ROUTE_TYPE, trip.url, "") + def _create_route(self, agency, route_id, long_name): + return GtfsRoute(agency, route_id, long_name, RIDESHARING_ROUTE_TYPE, "") def _create_calendar(self, trip): # TODO currently, calendar is not provided by Fahrgemeinschaft.de interface. @@ -132,8 +213,9 @@ class GtfsExport: def _create_calendar_date(self, trip): return GtfsCalendarDate(trip.trip_id, self._convert_stop_date(trip.start), CALENDAR_DATES_EXCEPTION_TYPE_ADDED) - def _create_trip(self, trip, shape_id): - return GtfsTrip(trip.trip_id, trip.trip_id, trip.trip_id, shape_id, trip.trip_headsign, NO_BIKES_ALLOWED) + def _create_trip(self, trip : Trip, shape_id): + driver_id = None if trip.driver is None else trip.driver.driver_id + return GtfsTrip(trip.route_id, trip.trip_id, driver_id, trip.trip_id, shape_id, trip.trip_headsign, NO_BIKES_ALLOWED, trip.url) def _convert_stop(self, stop): """ diff --git a/amarillo/plugins/gtfs_export/gtfs_generator.py b/amarillo/plugins/grfs_export/gtfs_generator.py similarity index 80% rename from amarillo/plugins/gtfs_export/gtfs_generator.py rename to amarillo/plugins/grfs_export/gtfs_generator.py index d9526e2..4ccf4a0 100644 --- a/amarillo/plugins/gtfs_export/gtfs_generator.py +++ b/amarillo/plugins/grfs_export/gtfs_generator.py @@ -1,11 +1,12 @@ from fastapi import FastAPI from amarillo.models.Carpool import Region -from amarillo.plugins.gtfs_export.gtfs_export import GtfsExport, GtfsFeedInfo, GtfsAgency -from amarillo.plugins.gtfs_export.gtfs import GtfsRtProducer +from amarillo.plugins.grfs_export.gtfs_export import GtfsExport, GtfsFeedInfo, GtfsAgency +from amarillo.plugins.grfs_export.gtfs import GtfsRtProducer from amarillo.utils.container import container -from amarillo.plugins.gtfs_export.router import router +from amarillo.plugins.grfs_export.router import router from amarillo.plugins.enhancer.configuration import configure_enhancer_services +from amarillo.utils.utils import assert_folder_exists from glob import glob import json import schedule @@ -46,7 +47,7 @@ def midnight(): generate_gtfs() def generate_gtfs(): - logger.info("Generate GTFS") + logger.info("Generate GRFS") for region in regions.values(): # TODO make feed producer infos configurable @@ -57,13 +58,13 @@ def generate_gtfs(): container['trips_store'], container['stops_store'], region.bbox) - exporter.export(f"data/gtfs/amarillo.{region.id}.gtfs.zip", "data/tmp/") + exporter.export(f"data/grfs/amarillo.{region.id}.gtfs.zip", "data/tmp/grfs") def generate_gtfs_rt(): - logger.info("Generate GTFS-RT") + logger.info("Generate GRFS-RT") producer = GtfsRtProducer(container['trips_store']) for region in regions.values(): - rt = producer.export_feed(time.time(), f"data/gtfs/amarillo.{region.id}.gtfsrt", bbox=region.bbox) + rt = producer.export_feed(time.time(), f"data/grfs/amarillo.{region.id}.gtfsrt", bbox=region.bbox) def start_schedule(): schedule.every().day.at("00:00").do(midnight) @@ -74,6 +75,7 @@ def start_schedule(): job_thread.start() def setup(app : FastAPI): + assert_folder_exists("data/grfs") configure_enhancer_services() app.include_router(router) start_schedule() \ No newline at end of file diff --git a/amarillo/plugins/gtfs_export/gtfsrt/__init__.py b/amarillo/plugins/grfs_export/gtfsrt/__init__.py similarity index 100% rename from amarillo/plugins/gtfs_export/gtfsrt/__init__.py rename to amarillo/plugins/grfs_export/gtfsrt/__init__.py diff --git a/amarillo/plugins/gtfs_export/gtfsrt/gtfs_realtime_pb2.py b/amarillo/plugins/grfs_export/gtfsrt/gtfs_realtime_pb2.py similarity index 100% rename from amarillo/plugins/gtfs_export/gtfsrt/gtfs_realtime_pb2.py rename to amarillo/plugins/grfs_export/gtfsrt/gtfs_realtime_pb2.py diff --git a/amarillo/plugins/gtfs_export/gtfsrt/realtime_extension_pb2.py b/amarillo/plugins/grfs_export/gtfsrt/realtime_extension_pb2.py similarity index 100% rename from amarillo/plugins/gtfs_export/gtfsrt/realtime_extension_pb2.py rename to amarillo/plugins/grfs_export/gtfsrt/realtime_extension_pb2.py diff --git a/amarillo/plugins/gtfs_export/models/__init__.py b/amarillo/plugins/grfs_export/models/__init__.py similarity index 100% rename from amarillo/plugins/gtfs_export/models/__init__.py rename to amarillo/plugins/grfs_export/models/__init__.py diff --git a/amarillo/plugins/gtfs_export/models/gtfs.py b/amarillo/plugins/grfs_export/models/gtfs.py similarity index 76% rename from amarillo/plugins/gtfs_export/models/gtfs.py rename to amarillo/plugins/grfs_export/models/gtfs.py index 33d38a0..77026b1 100644 --- a/amarillo/plugins/gtfs_export/models/gtfs.py +++ b/amarillo/plugins/grfs_export/models/gtfs.py @@ -1,16 +1,18 @@ -# TODO: move to enhancer from collections import namedtuple from datetime import timedelta + GtfsFeedInfo = namedtuple('GtfsFeedInfo', 'feed_id feed_publisher_name feed_publisher_url feed_lang feed_version') GtfsAgency = namedtuple('GtfsAgency', 'agency_id agency_name agency_url agency_timezone agency_lang agency_email') -GtfsRoute = namedtuple('GtfsRoute', 'agency_id route_id route_long_name route_type route_url route_short_name') +GtfsRoute = namedtuple('GtfsRoute', 'agency_id route_id route_long_name route_type route_short_name') GtfsStop = namedtuple('GtfsStop', 'stop_id stop_lat stop_lon stop_name') GtfsStopTime = namedtuple('GtfsStopTime', 'trip_id departure_time arrival_time stop_id stop_sequence pickup_type drop_off_type timepoint') -GtfsTrip = namedtuple('GtfsTrip', 'route_id trip_id service_id shape_id trip_headsign bikes_allowed') +GtfsTrip = namedtuple('GtfsTrip', 'route_id trip_id driver_id service_id shape_id trip_headsign bikes_allowed trip_url') GtfsCalendar = namedtuple('GtfsCalendar', 'service_id start_date end_date monday tuesday wednesday thursday friday saturday sunday') GtfsCalendarDate = namedtuple('GtfsCalendarDate', 'service_id date exception_type') GtfsShape = namedtuple('GtfsShape','shape_id shape_pt_lon shape_pt_lat shape_pt_sequence') +GtfsDriver = namedtuple('GtfsDriver','driver_id profile_picture rating') +GtfsAdditionalRidesharingInfo = namedtuple('GtfsAdditionalRidesharingInfo','trip_id number_free_seats same_gender luggage_size animal_car car_model car_brand creation_date smoking payment_method') # TODO Move to utils class GtfsTimeDelta(timedelta): diff --git a/amarillo/plugins/gtfs_export/router.py b/amarillo/plugins/grfs_export/router.py similarity index 76% rename from amarillo/plugins/gtfs_export/router.py rename to amarillo/plugins/grfs_export/router.py index 6055690..61ef0fd 100644 --- a/amarillo/plugins/gtfs_export/router.py +++ b/amarillo/plugins/grfs_export/router.py @@ -12,10 +12,10 @@ logger = logging.getLogger(__name__) router = APIRouter() -@router.post("/export") +@router.post("/export-grfs") async def post_agency_conf(admin_api_key: str = Depends(verify_admin_api_key)): #import is here to avoid circular import - from amarillo.plugins.gtfs_export.gtfs_generator import generate_gtfs + from amarillo.plugins.grfs_export.gtfs_generator import generate_gtfs generate_gtfs() #TODO: move to amarillo/utils? @@ -32,9 +32,9 @@ def _assert_region_exists(region_id: str) -> Region: return region -@router.get("region/{region_id}/gtfs", - summary="Return GTFS Feed for this region", - response_description="GTFS-Feed (zip-file)", +@router.get("region/{region_id}/grfs", + summary="Return GRFS Feed for this region", + response_description="GRFS-Feed (zip-file)", response_class=FileResponse, responses={ status.HTTP_404_NOT_FOUND: {"description": "Region not found"}, @@ -42,11 +42,11 @@ def _assert_region_exists(region_id: str) -> Region: ) async def get_file(region_id: str, user: str = Depends(verify_admin_api_key)): _assert_region_exists(region_id) - return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfs.zip') + return FileResponse(f'data/grfs/amarillo.{region_id}.gtfs.zip') -@router.get("region/{region_id}/gtfs-rt", - summary="Return GTFS-RT Feed for this region", - response_description="GTFS-RT-Feed", +@router.get("region/{region_id}/grfs-rt", + summary="Return GRFS-RT Feed for this region", + response_description="GRFS-RT-Feed", response_class=FileResponse, responses={ status.HTTP_404_NOT_FOUND: {"description": "Region not found"}, @@ -56,9 +56,9 @@ async def get_file(region_id: str, user: str = Depends(verify_admin_api_key)): async def get_file(region_id: str, format: str = 'protobuf', user: str = Depends(verify_admin_api_key)): _assert_region_exists(region_id) if format == 'json': - return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfsrt.json') + return FileResponse(f'data/grfs/amarillo.{region_id}.gtfsrt.json') elif format == 'protobuf': - return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfsrt.pbf') + return FileResponse(f'data/grfs/amarillo.{region_id}.gtfsrt.pbf') else: message = "Specified format is not supported, i.e. neither protobuf nor json." raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 873b92c..e6bb600 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "amarillo-gtfs-export" +name = "amarillo-grfs-export" version = "0.0.1" dependencies = [ "amarillo",