cmsmanage/membershipworks/tasks/scrape.py
Adam Goldsmith 62e48c6e6f
All checks were successful
Ruff / ruff (push) Successful in 29s
Test / test (push) Successful in 3m9s
membershipworks: Use bulk upsert when scraping transactions
Instead of trying to do fiddly things with relative times, which turns
out not to work very well. This will break if anyone changes the sid
or timestamp of a transaction though (because of course
MembershipWorks allows editing those).
2024-09-02 12:50:08 -04:00

205 lines
6.6 KiB
Python

import logging
from collections.abc import Iterable
from datetime import datetime, timedelta
from django.conf import settings
from django.db import transaction
from django.db.models import QuerySet
from cmsmanage.django_q2_helper import q_task_group
from membershipworks.membershipworks_api import FieldType, MembershipWorks
from membershipworks.models import (
Event,
EventCategory,
EventExt,
Flag,
Member,
Transaction,
)
logger = logging.getLogger(__name__)
MAX_MEETING_TIME = timedelta(hours=6)
def flags_for_member(csv_member, all_flags, folders):
for flag in all_flags:
if flag.type == "folder":
if csv_member["Account ID"] in folders[flag.id]:
yield flag
elif csv_member[flag.name] == flag.name:
yield flag
def update_flags(mw_flags) -> Iterable[Flag]:
for typ, flags_of_type in mw_flags.items():
for name, flag_id in flags_of_type.items():
flag = Flag(id=flag_id, name=name, type=typ[:-1])
flag.save()
yield flag
def scrape_members(membershipworks: MembershipWorks):
logger.info("Updating flags (labels, levels, and addons)")
flags = list(update_flags(membershipworks._parse_flags()))
logger.info("Getting folder membership")
folders = {
folder_id: membershipworks.get_member_ids([folder_name])
for folder_name, folder_id in membershipworks._parse_flags()["folders"].items()
}
logger.info("Getting/Updating members...")
members = membershipworks.get_all_members()
for csv_member in members:
for field in membershipworks._all_fields().values():
# convert checkboxes to real booleans
if (
field.get("typ") == FieldType.CHECKBOX.value
and field["lbl"] in csv_member
):
csv_member[field["lbl"]] = csv_member[field["lbl"]] == "Y"
# create/update member
member = Member.from_api_dict(csv_member)
member.clean_fields()
member.save()
member.flags.set(flags_for_member(csv_member, flags, folders))
def scrape_transactions(membershipworks: MembershipWorks):
now = datetime.now()
start_date = datetime(2010, 1, 1)
transactions_csv = membershipworks.get_transactions(start_date, now)
transactions_json = membershipworks.get_transactions(start_date, now, json=True)
# this is terrible, but as long as the dates are the same, should be fiiiine
transactions_data = [
{**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True)
]
assert all(
t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "")
for t in transactions_data
)
transactions = [
Transaction.from_api_dict(transaction) for transaction in transactions_data
]
Transaction.objects.bulk_create(
transactions,
update_conflicts=True,
update_fields=(
"member",
"type",
"sum",
"fee",
"event_id",
"for_what",
"items",
"discount_code",
"note",
"name",
"contact_person",
"full_address",
"street",
"city",
"state_province",
"postal_code",
"country",
"phone",
"email",
),
unique_fields=("sid", "timestamp"),
)
@q_task_group("Scrape MembershipWorks Data")
@transaction.atomic
def scrape_membershipworks(*args, **options):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
scrape_members(membershipworks)
scrape_transactions(membershipworks)
def scrape_event_details(queryset: QuerySet[EventExt]):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
for event in queryset:
event.details = membershipworks.get_event_by_eid(event.eid)
event.registrations = membershipworks.get_event_registrations(event.eid)
event.save()
@q_task_group("Scrape MembershipWorks Events")
def scrape_events():
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
data = membershipworks.get_events_list(
datetime.fromtimestamp(0), datetime.now() + timedelta(weeks=52), categories=True
)
logger.info(f"{len(data)} events retrieved!")
for category_id, category_data in enumerate(data["_st"]["evg"]):
category = EventCategory.from_api_dict(category_id, category_data)
category.clean_fields()
category.save()
events = Event.objects.bulk_create(
[Event.from_api_dict(event_data) for event_data in data["evt"]],
update_conflicts=True,
unique_fields=["eid"],
update_fields=[
field.attname
for field in Event._meta.get_fields()
if not (
field.auto_created
or field.many_to_many
or not field.concrete
or field.generated
or field.primary_key
)
],
)
for event in events:
try:
event_ext = EventExt.objects.get(event_ptr=event)
except EventExt.DoesNotExist:
event_ext = EventExt(event_ptr=event)
# create extension model instance
event_ext.save_base(raw=True)
event_ext.refresh_from_db()
if (
event_ext.end is not None
and event_ext.end > event_ext.start
and event_ext.end - event_ext.start < MAX_MEETING_TIME
):
meeting_times_count = event_ext.meeting_times.count()
if meeting_times_count == 0:
event_ext.meeting_times.create(start=event_ext.start, end=event_ext.end)
# if there is exactly one meeting time, it should match the event start/end
elif meeting_times_count == 1:
event_ext.meeting_times.update(start=event_ext.start, end=event_ext.end)
# event has no details, or last retrieval was before the event happened
if event_ext.details is None or event_ext.details_timestamp < (
event_ext.end or event_ext.start
):
event_ext.details = membershipworks.get_event_by_eid(event.eid)
event_ext.registrations = membershipworks.get_event_registrations(event.eid)
event_ext.save()
# delete all events that did not occur in the event list
EventExt.objects.exclude(pk__in=events).delete()