From 62e48c6e6f861c62bb692df565fa89045f403250 Mon Sep 17 00:00:00 2001 From: Adam Goldsmith Date: Mon, 2 Sep 2024 12:47:26 -0400 Subject: [PATCH] membershipworks: Use bulk upsert when scraping transactions Instead of trying to do fiddly things with relative times, which turns out not to work very well. This will break if anyone changes the sid or timestamp of a transaction though (because of course MembershipWorks allows editing those). --- .../0003_transaction_unique_sid_timestamp.py | 18 ++++++++ membershipworks/models.py | 3 ++ membershipworks/tasks/scrape.py | 42 ++++++++++++++----- 3 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 membershipworks/migrations/0003_transaction_unique_sid_timestamp.py diff --git a/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py b/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py new file mode 100644 index 0000000..fad8a52 --- /dev/null +++ b/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py @@ -0,0 +1,18 @@ +# Generated by Django 5.1 on 2024-09-02 16:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("membershipworks", "0002_historical_member_and_flags"), + ] + + operations = [ + migrations.AddConstraint( + model_name="transaction", + constraint=models.UniqueConstraint( + models.F("sid"), models.F("timestamp"), name="unique_sid_timestamp" + ), + ), + ] diff --git a/membershipworks/models.py b/membershipworks/models.py index e6a13cf..8502061 100644 --- a/membershipworks/models.py +++ b/membershipworks/models.py @@ -373,6 +373,9 @@ class Transaction(BaseModel): class Meta: db_table = "transactions" + constraints = [ + models.UniqueConstraint("sid", "timestamp", name="unique_sid_timestamp") + ] def __str__(self): return f"{self.type} [{self.member if self.member else self.name}] {self.timestamp}" diff --git a/membershipworks/tasks/scrape.py b/membershipworks/tasks/scrape.py index 6a004f7..119c4b0 100644 --- a/membershipworks/tasks/scrape.py +++ b/membershipworks/tasks/scrape.py @@ -70,27 +70,47 @@ def scrape_members(membershipworks: MembershipWorks): def scrape_transactions(membershipworks: MembershipWorks): now = datetime.now() start_date = datetime(2010, 1, 1) - last_transaction = Transaction.objects.order_by("timestamp").last() - if last_transaction is not None: - # technically this has the potential to lose - # transactions, but it should be incredibly unlikely - start_date = last_transaction.timestamp + timedelta(seconds=1) - - logger.info(f"Getting/Updating transactions since {start_date}...") transactions_csv = membershipworks.get_transactions(start_date, now) transactions_json = membershipworks.get_transactions(start_date, now, json=True) # this is terrible, but as long as the dates are the same, should be fiiiine - transactions = [ + transactions_data = [ {**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True) ] assert all( t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "") - for t in transactions + for t in transactions_data ) - for csv_transaction in transactions: - Transaction.from_api_dict(csv_transaction).save() + transactions = [ + Transaction.from_api_dict(transaction) for transaction in transactions_data + ] + Transaction.objects.bulk_create( + transactions, + update_conflicts=True, + update_fields=( + "member", + "type", + "sum", + "fee", + "event_id", + "for_what", + "items", + "discount_code", + "note", + "name", + "contact_person", + "full_address", + "street", + "city", + "state_province", + "postal_code", + "country", + "phone", + "email", + ), + unique_fields=("sid", "timestamp"), + ) @q_task_group("Scrape MembershipWorks Data")