One of our depositors is trying to upload some files to his dataset at DataverseNO using a Python script [1], but gets the following error message:
Complete upload response: {"status":"ERROR","message":"Only authenticated users can perform the requested operation"}
Failed to complete multipart upload
Failed to process CD44.zip
Any idea what is going wrong here?
Best,
Philipp
[1] Python file upload script:
import os
import json
import requests
import hashlib
# Configuration
API_TOKEN = "XXXX"
SERVER_URL = "
https://dataverse.no"
PERSISTENT_ID = "doi:10.18710/DIGQGQ"
FILES_PATH = r"J:\Downloads\transfer_369660_files_a6d0b3f0"
def get_upload_urls(file_size):
"""Step 1: Request upload URLs from Dataverse"""
headers = {"X-Dataverse-key": API_TOKEN}
url = f"{SERVER_URL}/api/datasets/:persistentId/uploadurls"
params = {
"persistentId": PERSISTENT_ID,
"size": file_size
}
response = requests.get(url, headers=headers, params=params)
return response.json()
def upload_part(url, part_data):
"""Upload a single part to S3"""
headers = {"x-amz-tagging": "dv-state=temp"}
response = requests.put(url, headers=headers, data=part_data)
if response.status_code == 200:
return response.headers.get('ETag')
return None
def complete_multipart(complete_url, etags):
"""Complete the multipart upload"""
response = requests.put(complete_url, json=etags)
return response.status_code == 200
def register_file(filename, storage_id):
"""Register the uploaded file in the dataset"""
headers = {"X-Dataverse-key": API_TOKEN}
url = f"{SERVER_URL}/api/datasets/:persistentId/add"
params = {"persistentId": PERSISTENT_ID}
json_data = {
"description": f"Upload of {filename}",
"fileName": filename,
"mimeType": "application/zip",
"storageIdentifier": storage_id
}
files = {"jsonData": (None, json.dumps(json_data))}
response =
requests.post(url, headers=headers, params=params, files=files)
return response.json()
def process_file(filename):
file_path = os.path.join(FILES_PATH, filename)
file_size = os.path.getsize(file_path)
print(f"\nProcessing {filename} ({file_size:,} bytes)")
# Step 1: Get upload URLs
print("Requesting upload URLs...")
upload_response = get_upload_urls(file_size)
if "data" not in upload_response:
print(f"Error getting upload URLs: {upload_response}")
return False
upload_data = upload_response["data"]
part_size = upload_data.get("partSize", file_size)
# Handle multipart or single upload
if "urls" in upload_data: # Multipart upload
print(f"Starting multipart upload with {len(upload_data['urls'])} parts")
etags = {}
with open(file_path, 'rb') as f:
for part_num in upload_data["urls"].keys():
print(f"Uploading part {part_num}...")
part_data = f.read(part_size)
if not part_data:
break
etag = upload_part(upload_data["urls"][part_num], part_data)
if etag:
etags[part_num] = etag
print(f"Part {part_num} uploaded successfully")
else:
print(f"Failed to upload part {part_num}")
return False
# Complete multipart upload
print("Completing multipart upload...")
complete_url = f"{SERVER_URL}{upload_data['complete']}"
if not complete_multipart(complete_url, etags):
print("Failed to complete multipart upload")
return False
else: # Single upload
print("Starting single file upload...")
with open(file_path, 'rb') as f:
if not upload_part(upload_data["url"], f.read()):
print("Failed to upload file")
return False
# Register file
print("Registering file in dataset...")
register_response = register_file(filename, upload_data["storageIdentifier"])
if register_response.get("status") == "OK":
print("File registered successfully")
return True
else:
print(f"Failed to register file: {register_response}")
return False
def main():
zip_files = [f for f in os.listdir(FILES_PATH) if f.endswith('.zip')]
print(f"Found {len(zip_files)} zip files to process")
for filename in zip_files:
try:
if process_file(filename):
print(f"Successfully processed {filename}")
else:
print(f"Failed to process {filename}")
except Exception as e:
print(f"Error processing {filename}: {str(e)}")
if __name__ == "__main__":
print("Starting upload process...")
main()
print("\nUpload process completed")