grfs export

This commit is contained in:
Csaba 2024-02-09 14:43:05 +01:00
parent 296a1cc386
commit 88e5c6ec2c
12 changed files with 124 additions and 38 deletions

View file

@ -1,6 +1,6 @@
import amarillo.plugins.gtfs_export.gtfsrt.gtfs_realtime_pb2 as gtfs_realtime_pb2 import amarillo.plugins.grfs_export.gtfsrt.gtfs_realtime_pb2 as gtfs_realtime_pb2
import amarillo.plugins.gtfs_export.gtfsrt.realtime_extension_pb2 as mfdzrte import amarillo.plugins.grfs_export.gtfsrt.realtime_extension_pb2 as mfdzrte
from amarillo.plugins.gtfs_export.gtfs_constants import * from amarillo.plugins.grfs_export.gtfs_constants import *
from google.protobuf.json_format import MessageToDict from google.protobuf.json_format import MessageToDict
from google.protobuf.json_format import ParseDict from google.protobuf.json_format import ParseDict
from datetime import datetime, timedelta from datetime import datetime, timedelta

View file

@ -8,9 +8,12 @@ import logging
import re import re
from amarillo.utils.utils import assert_folder_exists from amarillo.utils.utils import assert_folder_exists
from amarillo.plugins.gtfs_export.models.gtfs import GtfsTimeDelta, GtfsFeedInfo, GtfsAgency, GtfsRoute, GtfsStop, GtfsStopTime, GtfsTrip, GtfsCalendar, GtfsCalendarDate, GtfsShape from amarillo.plugins.grfs_export.models.gtfs import GtfsTimeDelta, GtfsFeedInfo, GtfsAgency, GtfsRoute, GtfsStop, GtfsStopTime, GtfsTrip, GtfsCalendar, GtfsCalendarDate, GtfsShape, GtfsDriver, GtfsAdditionalRidesharingInfo
from amarillo.models.Carpool import Driver, RidesharingInfo
from amarillo.utils.utils import geodesic_distance_in_m
from amarillo.plugins.enhancer.services.stops import is_carpooling_stop from amarillo.plugins.enhancer.services.stops import is_carpooling_stop
from amarillo.plugins.gtfs_export.gtfs_constants import * from amarillo.plugins.enhancer.services.trips import Trip
from amarillo.plugins.grfs_export.gtfs_constants import *
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -19,7 +22,7 @@ class GtfsExport:
stops_counter = 0 stops_counter = 0
trips_counter = 0 trips_counter = 0
routes_counter = 0 trip_counter = 0
stored_stops = {} stored_stops = {}
@ -32,6 +35,8 @@ class GtfsExport:
self.stop_times = [] self.stop_times = []
self.calendar = [] self.calendar = []
self.shapes = [] self.shapes = []
self.drivers = {} #use a dictionary to avoid duplicate ids
self.additional_ridesharing_infos = []
self.agencies = agencies self.agencies = agencies
self.feed_info = feed_info self.feed_info = feed_info
self.localized_to = " nach " self.localized_to = " nach "
@ -52,11 +57,14 @@ class GtfsExport:
self._write_csvfile(gtfsfolder, 'stops.txt', self.stops.values()) self._write_csvfile(gtfsfolder, 'stops.txt', self.stops.values())
self._write_csvfile(gtfsfolder, 'stop_times.txt', self.stop_times) self._write_csvfile(gtfsfolder, 'stop_times.txt', self.stop_times)
self._write_csvfile(gtfsfolder, 'shapes.txt', self.shapes) self._write_csvfile(gtfsfolder, 'shapes.txt', self.shapes)
self._write_csvfile(gtfsfolder, 'driver.txt', self.drivers.values())
self._write_csvfile(gtfsfolder, 'additional_ridesharing_info.txt', self.additional_ridesharing_infos)
self._zip_files(gtfszip_filename, gtfsfolder) self._zip_files(gtfszip_filename, gtfsfolder)
def _zip_files(self, gtfszip_filename, gtfsfolder): def _zip_files(self, gtfszip_filename, gtfsfolder):
gtfsfiles = ['agency.txt', 'feed_info.txt', 'routes.txt', 'trips.txt', gtfsfiles = ['agency.txt', 'feed_info.txt', 'routes.txt', 'trips.txt',
'calendar.txt', 'calendar_dates.txt', 'stops.txt', 'stop_times.txt', 'shapes.txt'] 'calendar.txt', 'calendar_dates.txt', 'stops.txt', 'stop_times.txt',
'shapes.txt', 'driver.txt', 'additional_ridesharing_info.txt']
with ZipFile(gtfszip_filename, 'w') as gtfszip: with ZipFile(gtfszip_filename, 'w') as gtfszip:
for gtfsfile in gtfsfiles: for gtfsfile in gtfsfiles:
gtfszip.write(gtfsfolder+'/'+gtfsfile, gtfsfile) gtfszip.write(gtfsfolder+'/'+gtfsfile, gtfsfile)
@ -74,20 +82,93 @@ class GtfsExport:
for stop in stopSet["stops"].itertuples(): for stop in stopSet["stops"].itertuples():
self._load_stored_stop(stop) self._load_stored_stop(stop)
cloned_trips = dict(ridestore.trips) cloned_trips = dict(ridestore.trips)
groups, cloned_trips = self.group_trips_into_routes(cloned_trips)
for group in groups:
if self.bbox is None or any(trip.intersects(self.bbox) for trip in group.values()):
self.convert_route(group)
for url, trip in cloned_trips.items(): for url, trip in cloned_trips.items():
# TODO: convert ridesharing info and driver data
if self.bbox is None or trip.intersects(self.bbox): if self.bbox is None or trip.intersects(self.bbox):
self._convert_trip(trip) self._convert_trip(trip)
def group_trips_into_routes(self, trips: dict):
ungrouped_trips = dict(trips)
route_groups = list()
current_route_id = 1
while len(ungrouped_trips) > 0:
trip_id, current_trip = ungrouped_trips.popitem()
current_group = {trip_id: current_trip}
current_trip.route_id = current_route_id
for other_id, other_trip in list(ungrouped_trips.items()):
# if an ungrouped trip is close to any of the grouped trips, add it to the route group
if (any(self.trips_are_close(other_trip, grouped_trip) for grouped_trip in current_group.values())):
current_group[other_id] = ungrouped_trips.pop(other_id)
current_group[other_id].route_id = current_route_id
route_groups.append(current_group)
current_route_id += 1
return route_groups, trips
def _convert_trip(self, trip): def trips_are_close(self, trip1, trip2):
self.routes_counter += 1 trip1_start = trip1.path.coordinates[0]
self.routes.append(self._create_route(trip)) trip1_end = trip1.path.coordinates[-1]
trip2_start = trip2.path.coordinates[0]
trip2_end = trip2.path.coordinates[-1]
res = self.within_range(trip1_start, trip2_start) and self.within_range(trip1_end, trip2_end)
return res
def within_range(self, stop1, stop2):
MERGE_RANGE_M = 500
return geodesic_distance_in_m(stop1, stop2) <= MERGE_RANGE_M
def convert_route(self, route_group):
agency = "multiple"
#if there is only one agency, use that
agencies = set(trip.agency for id, trip in route_group.items())
if len(agencies) == 1: agency = agencies.pop()
trip = next(iter(route_group.values())) # grab any trip, relevant values should be the same
self.routes.append(self._create_route(agency, trip.route_id, trip.route_name))
def _convert_trip(self, trip: Trip):
self.trip_counter += 1
self.calendar.append(self._create_calendar(trip)) self.calendar.append(self._create_calendar(trip))
if not trip.runs_regularly: if not trip.runs_regularly:
self.calendar_dates.append(self._create_calendar_date(trip)) self.calendar_dates.append(self._create_calendar_date(trip))
self.trips.append(self._create_trip(trip, self.routes_counter)) self.trips.append(self._create_trip(trip, self.trip_counter))
self._append_stops_and_stop_times(trip) self._append_stops_and_stop_times(trip)
self._append_shapes(trip, self.routes_counter) self._append_shapes(trip, self.trip_counter)
if(trip.driver is not None):
self.drivers[trip.driver.driver_id] = self._convert_driver(trip.driver)
if(trip.additional_ridesharing_info is not None):
self.additional_ridesharing_infos.append(
self._convert_additional_ridesharing_info(trip.trip_id, trip.additional_ridesharing_info))
def _convert_driver(self, driver: Driver):
return GtfsDriver(driver.driver_id, driver.profile_picture, driver.rating)
def _convert_additional_ridesharing_info(self, trip_id, info: RidesharingInfo):
# if we don't specify .value, the enum will appear in the export as e.g. LuggageSize.large
# and missing optional values get None
def get_enum_value(enum):
return enum.value if enum is not None else None
def format_date(date: datetime):
return date.strftime("%Y%m%d %H:%M:%S")
return GtfsAdditionalRidesharingInfo(
trip_id, info.number_free_seats, get_enum_value(info.same_gender), get_enum_value(info.luggage_size), get_enum_value(info.animal_car),
info.car_model, info.car_brand, format_date(info.creation_date), get_enum_value(info.smoking), info.payment_method)
def _trip_headsign(self, destination): def _trip_headsign(self, destination):
destination = destination.replace('(Deutschland)', '') destination = destination.replace('(Deutschland)', '')
destination = destination.replace(', Deutschland', '') destination = destination.replace(', Deutschland', '')
@ -111,8 +192,8 @@ class GtfsExport:
logger.exception(ex) logger.exception(ex)
return destination return destination
def _create_route(self, trip): def _create_route(self, agency, route_id, long_name):
return GtfsRoute(trip.agency, trip.trip_id, trip.route_long_name(), RIDESHARING_ROUTE_TYPE, trip.url, "") return GtfsRoute(agency, route_id, long_name, RIDESHARING_ROUTE_TYPE, "")
def _create_calendar(self, trip): def _create_calendar(self, trip):
# TODO currently, calendar is not provided by Fahrgemeinschaft.de interface. # TODO currently, calendar is not provided by Fahrgemeinschaft.de interface.
@ -132,8 +213,9 @@ class GtfsExport:
def _create_calendar_date(self, trip): def _create_calendar_date(self, trip):
return GtfsCalendarDate(trip.trip_id, self._convert_stop_date(trip.start), CALENDAR_DATES_EXCEPTION_TYPE_ADDED) return GtfsCalendarDate(trip.trip_id, self._convert_stop_date(trip.start), CALENDAR_DATES_EXCEPTION_TYPE_ADDED)
def _create_trip(self, trip, shape_id): def _create_trip(self, trip : Trip, shape_id):
return GtfsTrip(trip.trip_id, trip.trip_id, trip.trip_id, shape_id, trip.trip_headsign, NO_BIKES_ALLOWED) driver_id = None if trip.driver is None else trip.driver.driver_id
return GtfsTrip(trip.route_id, trip.trip_id, driver_id, trip.trip_id, shape_id, trip.trip_headsign, NO_BIKES_ALLOWED, trip.url)
def _convert_stop(self, stop): def _convert_stop(self, stop):
""" """

View file

@ -1,11 +1,12 @@
from fastapi import FastAPI from fastapi import FastAPI
from amarillo.models.Carpool import Region from amarillo.models.Carpool import Region
from amarillo.plugins.gtfs_export.gtfs_export import GtfsExport, GtfsFeedInfo, GtfsAgency from amarillo.plugins.grfs_export.gtfs_export import GtfsExport, GtfsFeedInfo, GtfsAgency
from amarillo.plugins.gtfs_export.gtfs import GtfsRtProducer from amarillo.plugins.grfs_export.gtfs import GtfsRtProducer
from amarillo.utils.container import container from amarillo.utils.container import container
from amarillo.plugins.gtfs_export.router import router from amarillo.plugins.grfs_export.router import router
from amarillo.plugins.enhancer.configuration import configure_enhancer_services from amarillo.plugins.enhancer.configuration import configure_enhancer_services
from amarillo.utils.utils import assert_folder_exists
from glob import glob from glob import glob
import json import json
import schedule import schedule
@ -46,7 +47,7 @@ def midnight():
generate_gtfs() generate_gtfs()
def generate_gtfs(): def generate_gtfs():
logger.info("Generate GTFS") logger.info("Generate GRFS")
for region in regions.values(): for region in regions.values():
# TODO make feed producer infos configurable # TODO make feed producer infos configurable
@ -57,13 +58,13 @@ def generate_gtfs():
container['trips_store'], container['trips_store'],
container['stops_store'], container['stops_store'],
region.bbox) region.bbox)
exporter.export(f"data/gtfs/amarillo.{region.id}.gtfs.zip", "data/tmp/") exporter.export(f"data/grfs/amarillo.{region.id}.gtfs.zip", "data/tmp/grfs")
def generate_gtfs_rt(): def generate_gtfs_rt():
logger.info("Generate GTFS-RT") logger.info("Generate GRFS-RT")
producer = GtfsRtProducer(container['trips_store']) producer = GtfsRtProducer(container['trips_store'])
for region in regions.values(): for region in regions.values():
rt = producer.export_feed(time.time(), f"data/gtfs/amarillo.{region.id}.gtfsrt", bbox=region.bbox) rt = producer.export_feed(time.time(), f"data/grfs/amarillo.{region.id}.gtfsrt", bbox=region.bbox)
def start_schedule(): def start_schedule():
schedule.every().day.at("00:00").do(midnight) schedule.every().day.at("00:00").do(midnight)
@ -74,6 +75,7 @@ def start_schedule():
job_thread.start() job_thread.start()
def setup(app : FastAPI): def setup(app : FastAPI):
assert_folder_exists("data/grfs")
configure_enhancer_services() configure_enhancer_services()
app.include_router(router) app.include_router(router)
start_schedule() start_schedule()

View file

@ -1,16 +1,18 @@
# TODO: move to enhancer
from collections import namedtuple from collections import namedtuple
from datetime import timedelta from datetime import timedelta
GtfsFeedInfo = namedtuple('GtfsFeedInfo', 'feed_id feed_publisher_name feed_publisher_url feed_lang feed_version') GtfsFeedInfo = namedtuple('GtfsFeedInfo', 'feed_id feed_publisher_name feed_publisher_url feed_lang feed_version')
GtfsAgency = namedtuple('GtfsAgency', 'agency_id agency_name agency_url agency_timezone agency_lang agency_email') GtfsAgency = namedtuple('GtfsAgency', 'agency_id agency_name agency_url agency_timezone agency_lang agency_email')
GtfsRoute = namedtuple('GtfsRoute', 'agency_id route_id route_long_name route_type route_url route_short_name') GtfsRoute = namedtuple('GtfsRoute', 'agency_id route_id route_long_name route_type route_short_name')
GtfsStop = namedtuple('GtfsStop', 'stop_id stop_lat stop_lon stop_name') GtfsStop = namedtuple('GtfsStop', 'stop_id stop_lat stop_lon stop_name')
GtfsStopTime = namedtuple('GtfsStopTime', 'trip_id departure_time arrival_time stop_id stop_sequence pickup_type drop_off_type timepoint') GtfsStopTime = namedtuple('GtfsStopTime', 'trip_id departure_time arrival_time stop_id stop_sequence pickup_type drop_off_type timepoint')
GtfsTrip = namedtuple('GtfsTrip', 'route_id trip_id service_id shape_id trip_headsign bikes_allowed') GtfsTrip = namedtuple('GtfsTrip', 'route_id trip_id driver_id service_id shape_id trip_headsign bikes_allowed trip_url')
GtfsCalendar = namedtuple('GtfsCalendar', 'service_id start_date end_date monday tuesday wednesday thursday friday saturday sunday') GtfsCalendar = namedtuple('GtfsCalendar', 'service_id start_date end_date monday tuesday wednesday thursday friday saturday sunday')
GtfsCalendarDate = namedtuple('GtfsCalendarDate', 'service_id date exception_type') GtfsCalendarDate = namedtuple('GtfsCalendarDate', 'service_id date exception_type')
GtfsShape = namedtuple('GtfsShape','shape_id shape_pt_lon shape_pt_lat shape_pt_sequence') GtfsShape = namedtuple('GtfsShape','shape_id shape_pt_lon shape_pt_lat shape_pt_sequence')
GtfsDriver = namedtuple('GtfsDriver','driver_id profile_picture rating')
GtfsAdditionalRidesharingInfo = namedtuple('GtfsAdditionalRidesharingInfo','trip_id number_free_seats same_gender luggage_size animal_car car_model car_brand creation_date smoking payment_method')
# TODO Move to utils # TODO Move to utils
class GtfsTimeDelta(timedelta): class GtfsTimeDelta(timedelta):

View file

@ -12,10 +12,10 @@ logger = logging.getLogger(__name__)
router = APIRouter() router = APIRouter()
@router.post("/export") @router.post("/export-grfs")
async def post_agency_conf(admin_api_key: str = Depends(verify_admin_api_key)): async def post_agency_conf(admin_api_key: str = Depends(verify_admin_api_key)):
#import is here to avoid circular import #import is here to avoid circular import
from amarillo.plugins.gtfs_export.gtfs_generator import generate_gtfs from amarillo.plugins.grfs_export.gtfs_generator import generate_gtfs
generate_gtfs() generate_gtfs()
#TODO: move to amarillo/utils? #TODO: move to amarillo/utils?
@ -32,9 +32,9 @@ def _assert_region_exists(region_id: str) -> Region:
return region return region
@router.get("region/{region_id}/gtfs", @router.get("region/{region_id}/grfs",
summary="Return GTFS Feed for this region", summary="Return GRFS Feed for this region",
response_description="GTFS-Feed (zip-file)", response_description="GRFS-Feed (zip-file)",
response_class=FileResponse, response_class=FileResponse,
responses={ responses={
status.HTTP_404_NOT_FOUND: {"description": "Region not found"}, status.HTTP_404_NOT_FOUND: {"description": "Region not found"},
@ -42,11 +42,11 @@ def _assert_region_exists(region_id: str) -> Region:
) )
async def get_file(region_id: str, user: str = Depends(verify_admin_api_key)): async def get_file(region_id: str, user: str = Depends(verify_admin_api_key)):
_assert_region_exists(region_id) _assert_region_exists(region_id)
return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfs.zip') return FileResponse(f'data/grfs/amarillo.{region_id}.gtfs.zip')
@router.get("region/{region_id}/gtfs-rt", @router.get("region/{region_id}/grfs-rt",
summary="Return GTFS-RT Feed for this region", summary="Return GRFS-RT Feed for this region",
response_description="GTFS-RT-Feed", response_description="GRFS-RT-Feed",
response_class=FileResponse, response_class=FileResponse,
responses={ responses={
status.HTTP_404_NOT_FOUND: {"description": "Region not found"}, status.HTTP_404_NOT_FOUND: {"description": "Region not found"},
@ -56,9 +56,9 @@ async def get_file(region_id: str, user: str = Depends(verify_admin_api_key)):
async def get_file(region_id: str, format: str = 'protobuf', user: str = Depends(verify_admin_api_key)): async def get_file(region_id: str, format: str = 'protobuf', user: str = Depends(verify_admin_api_key)):
_assert_region_exists(region_id) _assert_region_exists(region_id)
if format == 'json': if format == 'json':
return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfsrt.json') return FileResponse(f'data/grfs/amarillo.{region_id}.gtfsrt.json')
elif format == 'protobuf': elif format == 'protobuf':
return FileResponse(f'data/gtfs/amarillo.{region_id}.gtfsrt.pbf') return FileResponse(f'data/grfs/amarillo.{region_id}.gtfsrt.pbf')
else: else:
message = "Specified format is not supported, i.e. neither protobuf nor json." message = "Specified format is not supported, i.e. neither protobuf nor json."
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message) raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)

View file

@ -1,5 +1,5 @@
[project] [project]
name = "amarillo-gtfs-export" name = "amarillo-grfs-export"
version = "0.0.1" version = "0.0.1"
dependencies = [ dependencies = [
"amarillo", "amarillo",