Skip saving row if slug already exists in postgres

2020-05-03 13:41发布

问题:

I'm setting up a function in my Django Views that calls an API and save the data into my Postgresql database. Everthing was working fine until I got an IntegrityError slugkey already exists, so I'm trying to find a way to skip or ignore the row if the slugify slug already exists.

I've been stuck with this all day and I need a solution to respect expected timeline..

This is my Django Models:

class Product(models.Model):
    destination = models.CharField(max_length=255, default='')
    title = models.CharField(max_length=255, default='')
    slug = models.SlugField(unique=True, max_length=255, default='')
    description = models.TextField(max_length=2047, default='')
    link = models.TextField(max_length=500, default='')

    ptags = TaggableManager()

    image = models.ImageField(max_length=500, default='images/zero-image-found.png')
    timestamp = models.DateTimeField(auto_now=True)

    def _ptags(self):
        return [t.name for t in self.ptags.all()]

    def get_absolute_url(self):
        return reverse('experience',
                       kwargs={'slug': self.slug})

    def save(self, *args, **kwargs):
        if not self.id:
            self.slug = slugify(self.title)
        super(Product, self).save(*args, **kwargs)

    def __str__(self):
        return self.destination

And this is my function in Views:

def api_data(request):
    if request.GET.get('mybtn'):  # to improve, == 'something':
        resp_1 = requests.get(
            "https://www.headout.com/api/public/v1/product/listing/list-by/city?language=fr&cityCode=ROME&limit=5000&currencyCode=CAD",
            headers={
                "Headout-Auth": HEADOUT_PRODUCTION_API_KEY
            })
        resp_1_data = resp_1.json()
        base_url_2 = "https://www.headout.com/api/public/v1/product/get/"


        for item in resp_1_data['items']:
            print('parsing, translating and saving item {}'.format(item['id']))
            # concat ID to the URL string
            url = '{}{}'.format(base_url_2, item['id'] + '?language=fr')

            # make the HTTP request
            resp_2 = requests.get(
                url,
                headers={
                    "Headout-Auth": HEADOUT_PRODUCTION_API_KEY
                })
            resp_2_data = resp_2.json()

            try:
                descriptiontxt = resp_2_data['contentListHtml'][0]['html'][0:2040] + ' ...'
            except (IndexError, KeyError) as e:
                continue

            #Parsing the description to get only the text in <p>
            soup = BeautifulSoup(descriptiontxt, 'lxml')

            try:
                parsed = soup.find('p').text
            except AttributeError:
                continue

            if len(parsed) == 0:
                continue

            #Translation
            translation = Translator().translate(text=parsed, dest='fr').text

            titlename = item['name']
            titlefr = Translator().translate(text=titlename, dest='fr').text

            destinationname = item['city']['name']
            destinationfr = Translator().translate(text=destinationname, dest='fr').text


            Product.objects.get_or_create(
                title=titlefr,
                destination=destinationfr,
                description=translation,
                link=item['canonicalUrl'],
                image=item['image']['url'],
            )

            time.sleep(2)

    return render(request, "form.html")

Otherwise I keep getting an IntegrityError and I need to run my sripts again which take a long time.

So how can I fix this?

Please help.

EDIT

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 473, in get_or_create
    return self.get(**lookup), False
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 385, in get
    self.model._meta.object_name
search.models.DoesNotExist: Product matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL:  Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\exception.py", line 39, in inner
    response = get_response(request)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 187, in _get_response
    response = self.process_exception_by_middleware(e, request)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 185, in _get_response
    response = wrapped_callback(request, *callback_args, **callback_kwargs)
  File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\views.py", line 170, in api_data
    brandlogo='https://cdn-imgix-open.headout.com/logo/www-desktop-8743256.png?w=300&h=50&fit=fill'
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 475, in get_or_create
    return self._create_object_from_params(lookup, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 513, in _create_object_from_params
    six.reraise(*exc_info)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 686, in reraise
    raise value
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 505, in _create_object_from_params
    obj = self.create(**params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 399, in create
    obj.save(force_insert=True, using=self.db)
  File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\models.py", line 55, in save
    super(Product, self).save(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 796, in save
    force_update=force_update, update_fields=update_fields)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 824, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 908, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 947, in _do_insert
    using=using, raw=raw)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 1045, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1054, in execute_sql
    cursor.execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 79, in execute
    return super(CursorDebugWrapper, self).execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\utils.py", line 94, in __exit__
    six.reraise(dj_exc_type, dj_exc_value, traceback)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL:  Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.

回答1:

get_or_create uses all the given values (except defaults keyword) to find the dups.

Change your code to this

Product.objects.get_or_create(
            title=titlefr,
            slug=slugify(titlefr),
            defaults={
                'destination': destinationfr,
                'description': translation,
                'link': item['canonicalUrl'],
                'image': item['image']['url'],
            }
        )

thus only title and slug will be used for finding possible duplicates. All the other values from defaults will not be used for filtering, but will be used for creation.

Also I suggest you to move slug field initialization into clean_fields() method.



回答2:

You will need to check if the slug already exists before saving

def save(self, *args, **kwargs):     
    if not self.id:
        if Product.objects.filter(slug=slugify(self.title)).exists():
            self.slug = slugify("f{self.title}-{Product.objects.filter(slug__startswith=slugify(self.title).count + 1}" )
        else:
            self.slug = slugify(self.title)

        super(Product, self).save(*args, **kwargs)