diff --git a/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py b/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py new file mode 100644 index 0000000..fad8a52 --- /dev/null +++ b/membershipworks/migrations/0003_transaction_unique_sid_timestamp.py @@ -0,0 +1,18 @@ +# Generated by Django 5.1 on 2024-09-02 16:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("membershipworks", "0002_historical_member_and_flags"), + ] + + operations = [ + migrations.AddConstraint( + model_name="transaction", + constraint=models.UniqueConstraint( + models.F("sid"), models.F("timestamp"), name="unique_sid_timestamp" + ), + ), + ] diff --git a/membershipworks/models.py b/membershipworks/models.py index e6a13cf..8502061 100644 --- a/membershipworks/models.py +++ b/membershipworks/models.py @@ -373,6 +373,9 @@ class Transaction(BaseModel): class Meta: db_table = "transactions" + constraints = [ + models.UniqueConstraint("sid", "timestamp", name="unique_sid_timestamp") + ] def __str__(self): return f"{self.type} [{self.member if self.member else self.name}] {self.timestamp}" diff --git a/membershipworks/tasks/scrape.py b/membershipworks/tasks/scrape.py index 6a004f7..119c4b0 100644 --- a/membershipworks/tasks/scrape.py +++ b/membershipworks/tasks/scrape.py @@ -70,27 +70,47 @@ def scrape_members(membershipworks: MembershipWorks): def scrape_transactions(membershipworks: MembershipWorks): now = datetime.now() start_date = datetime(2010, 1, 1) - last_transaction = Transaction.objects.order_by("timestamp").last() - if last_transaction is not None: - # technically this has the potential to lose - # transactions, but it should be incredibly unlikely - start_date = last_transaction.timestamp + timedelta(seconds=1) - - logger.info(f"Getting/Updating transactions since {start_date}...") transactions_csv = membershipworks.get_transactions(start_date, now) transactions_json = membershipworks.get_transactions(start_date, now, json=True) # this is terrible, but as long as the dates are the same, should be fiiiine - transactions = [ + transactions_data = [ {**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True) ] assert all( t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "") - for t in transactions + for t in transactions_data ) - for csv_transaction in transactions: - Transaction.from_api_dict(csv_transaction).save() + transactions = [ + Transaction.from_api_dict(transaction) for transaction in transactions_data + ] + Transaction.objects.bulk_create( + transactions, + update_conflicts=True, + update_fields=( + "member", + "type", + "sum", + "fee", + "event_id", + "for_what", + "items", + "discount_code", + "note", + "name", + "contact_person", + "full_address", + "street", + "city", + "state_province", + "postal_code", + "country", + "phone", + "email", + ), + unique_fields=("sid", "timestamp"), + ) @q_task_group("Scrape MembershipWorks Data")