Stops package, use underscores in folder name

This commit is contained in:
Csaba 2024-07-09 12:12:13 +02:00
parent 41b963d50c
commit 1b1ac3c862
22 changed files with 38 additions and 226 deletions

7
.vscode/launch.json vendored
View file

@ -23,17 +23,14 @@
"request": "launch",
"module": "uvicorn",
"args": [
"amarillo-gtfs-generator.gtfs_generator:app",
"amarillo_gtfs_generator.gtfs_generator:app",
"--workers=1",
"--port=8002"
],
// "preLaunchTask": "enhance",
"jinja": true,
"justMyCode": false,
"env": {
"admin_token": "supersecret",
"ride2go_token": "supersecret2"
}
"env": {}
}
]
}

View file

@ -9,10 +9,10 @@ EXPOSE 80
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
COPY ./amarillo-gtfs-generator /app/amarillo-gtfs-generator
COPY ./amarillo_gtfs_generator /app/amarillo_gtfs_generator
COPY ./logging.conf /app
ENV MODULE_NAME=amarillo-gtfs-generator.gtfs_generator
ENV MODULE_NAME=amarillo_gtfs_generator.gtfs_generator
ENV MAX_WORKERS=1
RUN useradd amarillo

View file

@ -1,182 +0,0 @@
import csv
import geopandas as gpd
import pandas as pd
from amarillo.models.Carpool import StopTime
from contextlib import closing
from shapely.geometry import Point, LineString
from shapely.ops import transform
from pyproj import Proj, Transformer
import re
import requests
from io import TextIOWrapper
import codecs
import logging
logger = logging.getLogger(__name__)
class StopsStore():
def __init__(self, stop_sources = [], internal_projection = "EPSG:32632"):
self.internal_projection = internal_projection
self.projection = Transformer.from_crs("EPSG:4326", internal_projection, always_xy=True).transform
self.stopsDataFrames = []
self.stop_sources = stop_sources
def load_stop_sources(self):
"""Imports stops from stop_sources and registers them with
the distance they are still associated with a trip.
E.g. bus stops should be registered with a distance of e.g. 30m,
while larger carpool parkings might be registered with e.g. 500m.
Subsequent calls of load_stop_sources will reload all stop_sources
but replace the current stops only if all stops could be loaded successfully.
"""
stopsDataFrames = []
error_occured = False
for stops_source in self.stop_sources:
try:
stopsDataFrame =self._load_stops(stops_source["url"])
stopsDataFrames.append({'distanceInMeter': stops_source["vicinity"],
'stops': stopsDataFrame})
except Exception as err:
error_occured = True
logger.error("Failed to load stops from %s to StopsStore.", stops_source["url"], exc_info=True)
if not error_occured:
self.stopsDataFrames = stopsDataFrames
def find_additional_stops_around(self, line, stops = None):
"""Returns a GeoDataFrame with all stops in vicinity of the
given line, sorted by distance from origin of the line.
Note: for internal projection/distance calculations, the
lat/lon geometries of line and stops are converted to
"""
stops_frames = []
if stops:
stops_frames.append(self._convert_to_dataframe(stops))
transformedLine = transform(self.projection, LineString(line.coordinates))
for stops_to_match in self.stopsDataFrames:
stops_frames.append(self._find_stops_around_transformed(stops_to_match['stops'], transformedLine, stops_to_match['distanceInMeter']))
stops = gpd.GeoDataFrame( pd.concat(stops_frames, ignore_index=True, sort=True))
if not stops.empty:
self._sort_by_distance(stops, transformedLine)
return stops
def find_closest_stop(self, carpool_stop, max_search_distance):
transformedCoord = Point(self.projection(carpool_stop.lon, carpool_stop.lat))
best_dist = max_search_distance + 1
best_stop = None
for stops_with_dist in self.stopsDataFrames:
stops = stops_with_dist['stops']
s, d = stops.sindex.nearest(transformedCoord, return_all= True, return_distance=True, max_distance=max_search_distance)
if len(d) > 0 and d[0] < best_dist:
best_dist = d[0]
row = s[1][0]
best_stop = StopTime(name=stops.at[row, 'stop_name'], lat=stops.at[row, 'y'], lon=stops.at[row, 'x'])
return best_stop if best_stop else carpool_stop
def _normalize_stop_name(self, stop_name):
default_name = 'P+R-Parkplatz'
if stop_name in ('', 'Park&Ride'):
return default_name
normalized_stop_name = re.sub(r"P(ark)?\s?[\+&]\s?R(ail|ide)?",'P+R', stop_name)
return normalized_stop_name
def _load_stops(self, source : str):
"""Loads stops from given source and registers them with
the distance they are still associated with a trip.
E.g. bus stops should be registered with a distance of e.g. 30m,
while larger carpool parkings might be registered with e.g. 500m
"""
logger.info("Load stops from %s", source)
if source.startswith('http'):
if source.endswith('json'):
with requests.get(source) as json_source:
stopsDataFrame = self._load_stops_geojson(json_source.json())
else:
with requests.get(source) as csv_source:
stopsDataFrame = self._load_stops_csv(codecs.iterdecode(csv_source.iter_lines(), 'utf-8'))
else:
with open(source, encoding='utf-8') as csv_source:
stopsDataFrame = self._load_stops_csv(csv_source)
return stopsDataFrame
def _load_stops_csv(self, csv_source):
id = []
lat = []
lon = []
stop_name = []
reader = csv.DictReader(csv_source, delimiter=';')
columns = ['stop_id', 'stop_lat', 'stop_lon', 'stop_name']
lists = [id, lat, lon, stop_name]
for row in reader:
for col, lst in zip(columns, lists):
if col == "stop_lat" or col == "stop_lon":
lst.append(float(row[col].replace(",",".")))
elif col == "stop_name":
row_stop_name = self._normalize_stop_name(row[col])
lst.append(row_stop_name)
else:
lst.append(row[col])
return self._as_dataframe(id, lat, lon, stop_name)
def _load_stops_geojson(self, geojson_source):
id = []
lat = []
lon = []
stop_name = []
columns = ['stop_id', 'stop_lat', 'stop_lon', 'stop_name']
lists = [id, lat, lon, stop_name]
for row in geojson_source['features']:
coord = row['geometry']['coordinates']
if not coord or not row['properties'].get('name'):
logger.error('Stop feature {} has null coord or name'.format(row['id']))
continue
for col, lst in zip(columns, lists):
if col == "stop_lat":
lst.append(coord[1])
elif col == "stop_lon":
lst.append(coord[0])
elif col == "stop_name":
row_stop_name = self._normalize_stop_name(row['properties']['name'])
lst.append(row_stop_name)
elif col == "stop_id":
lst.append(row['id'])
return self._as_dataframe(id, lat, lon, stop_name)
def _as_dataframe(self, id, lat, lon, stop_name):
df = gpd.GeoDataFrame(data={'x':lon, 'y':lat, 'stop_name':stop_name, 'id':id})
stopsGeoDataFrame = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y, crs='EPSG:4326'))
stopsGeoDataFrame.to_crs(crs=self.internal_projection, inplace=True)
return stopsGeoDataFrame
def _find_stops_around_transformed(self, stopsDataFrame, transformedLine, distance):
bufferedLine = transformedLine.buffer(distance)
sindex = stopsDataFrame.sindex
possible_matches_index = list(sindex.intersection(bufferedLine.bounds))
possible_matches = stopsDataFrame.iloc[possible_matches_index]
exact_matches = possible_matches[possible_matches.intersects(bufferedLine)]
return exact_matches
def _convert_to_dataframe(self, stops):
return gpd.GeoDataFrame([[stop.name, stop.lon, stop.lat,
stop.id, Point(self.projection(stop.lon, stop.lat))] for stop in stops], columns = ['stop_name','x','y','id','geometry'], crs=self.internal_projection)
def _sort_by_distance(self, stops, transformedLine):
stops['distance']=stops.apply(lambda row: transformedLine.project(row['geometry']), axis=1)
stops.sort_values('distance', inplace=True)
def is_carpooling_stop(stop_id, name):
stop_name = name.lower()
# mfdz: or bbnavi: prefixed stops are custom stops which are explicitly meant to be carpooling stops
return stop_id.startswith('mfdz:') or stop_id.startswith('bbnavi:') or 'mitfahr' in stop_name or 'p&m' in stop_name

View file

@ -9,7 +9,7 @@ import re
from amarillo.utils.utils import assert_folder_exists
from .models.gtfs import GtfsTimeDelta, GtfsFeedInfo, GtfsAgency, GtfsRoute, GtfsStop, GtfsStopTime, GtfsTrip, GtfsCalendar, GtfsCalendarDate, GtfsShape
from .services.stops import is_carpooling_stop
from amarillo_stops.stops import is_carpooling_stop
from .gtfs_constants import *
from .models.Carpool import Agency

View file

@ -17,7 +17,7 @@ from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from .models.Carpool import Carpool, Region
from .router import _assert_region_exists
from .services import stops #TODO: make stop service its own package??
from amarillo_stops import stops
from .services.trips import TripStore, Trip
from .services.carpools import CarpoolService
from .services.agencies import AgencyService
@ -96,22 +96,25 @@ def init():
observer.schedule(EventHandler(), 'data/enhanced', recursive=True)
observer.start()
start_schedule()
# def run_schedule():
generate_gtfs()
# while 1:
# try:
# schedule.run_pending()
# except Exception as e:
# logger.exception(e)
# time.sleep(1)
# def midnight():
# container['stops_store'].load_stop_sources()
# # container['trips_store'].unflag_unrecent_updates()
# # container['carpools'].purge_outdated_offers()
def run_schedule():
while 1:
try:
schedule.run_pending()
except Exception as e:
logger.exception(e)
time.sleep(1)
# generate_gtfs()
def midnight():
container['stops_store'].load_stop_sources()
container['trips_store'].unflag_unrecent_updates()
container['carpools'].purge_outdated_offers()
generate_gtfs()
#TODO: generate for a specific region only
#TODO: what happens when there are no trips?
@ -135,20 +138,14 @@ def generate_gtfs_rt():
for region in container['regions'].regions.values():
rt = producer.export_feed(time.time(), f"data/gtfs/amarillo.{region.id}.gtfsrt", bbox=region.bbox)
# def start_schedule():
# # schedule.every().day.at("00:00").do(midnight)
def start_schedule():
schedule.every().day.at("00:00").do(midnight)
# schedule.every(60).seconds.do(generate_gtfs_rt)
# # Create all feeds once at startup
# Create all feeds once at startup
# schedule.run_all()
# job_thread = threading.Thread(target=run_schedule, daemon=True)
# job_thread.start()
job_thread = threading.Thread(target=run_schedule, daemon=True)
job_thread.start()
# def setup(app : FastAPI):
# # TODO: Create all feeds once at startup
# # configure_enhancer_services()
# # app.include_router(router)
# # start_schedule()
# pass
logging.config.fileConfig('logging.conf', disable_existing_loggers=False)
logger = logging.getLogger("gtfs-generator")
@ -272,11 +269,11 @@ async def get_file(region_id: str, format: str = 'protobuf'):
#TODO: sync endpoint that calls midnight
# @app.post("/sync",
# operation_id="sync")
# #TODO: add examples
# async def post_sync():
@app.post("/sync",
operation_id="sync")
#TODO: add examples
async def post_sync():
# logger.info(f"Sync")
logger.info(f"Sync")
# midnight()
midnight()

View file

@ -3,7 +3,7 @@ from ..models.Carpool import MAX_STOPS_PER_TRIP, Carpool, Weekday, StopTime, Pic
# from amarillo.services.config import config
from ..gtfs_constants import *
# from amarillo.plugins.enhancer.services.routing import RoutingService, RoutingException
from ..services.stops import is_carpooling_stop
from amarillo_stops.stops import is_carpooling_stop
from amarillo.utils.utils import assert_folder_exists, is_older_than_days, yesterday, geodesic_distance_in_m
from shapely.geometry import Point, LineString, box
from geojson_pydantic.geometries import LineString as GeoJSONLineString

View file

@ -1,8 +1,8 @@
from amarillo.tests.sampledata import carpool_1234, data1, carpool_repeating_json, stop_issue
from amarillo.plugins.enhancer.services.gtfs_export import GtfsExport
from amarillo.plugins.enhancer.services.gtfs import GtfsRtProducer
from amarillo.plugins.enhancer.services.stops import StopsStore
from amarillo.plugins.enhancer.services.trips import TripStore
from amarillo_gtfs_generator.gtfs_export import GtfsExport
from amarillo_gtfs_generator.gtfs import GtfsRtProducer
from amarillo_stops.stops import StopsStore
from amarillo_gtfs_generator.services.trips import TripStore
from amarillo.models.Carpool import Carpool
from datetime import datetime
import time