fix(jira-import): handle Jira API v3 ADF format and explicit field selection (#825)

Two fixes for real-world Jira Cloud usage:

1. Add fields=*all to API request - v3 search/jql endpoint returns
   only issue IDs by default, causing empty title/description imports

2. Add adf_to_text() converter for Atlassian Document Format - API v3
   returns rich text fields as ADF JSON instead of plain text/HTML

Also documented:
- Silent auth failure gotcha (200 with empty results vs 401)
- ADF format explanation with conversion examples
This commit is contained in:
Ezo Saleh
2025-12-31 19:37:51 +00:00
committed by GitHub
parent 6893eb6080
commit 8ab9b815ba
2 changed files with 93 additions and 4 deletions

View File

@@ -345,6 +345,7 @@ python jira2jsonl.py --from-config --id-mode hash | bd import
- Verify you're using your email as username
- Create a fresh API token at https://id.atlassian.com/manage-profile/security/api-tokens
- Ensure the token has access to the project
- **Silent auth failure**: The Jira API may return HTTP 200 with empty results instead of 401. Check for `X-Seraph-Loginreason: AUTHENTICATED_FAILED` header in responses.
**Jira Server/DC:**
- Try using a Personal Access Token instead of password
@@ -376,6 +377,30 @@ Jira Cloud has rate limits. For large imports:
This script fetches 100 issues per request, so a 1000-issue project requires ~10 API calls.
## Jira API v3 Notes
This script uses the Jira REST API v3 `/rest/api/3/search/jql` endpoint. The older `/rest/api/3/search` endpoint was deprecated (returns HTTP 410 Gone). Two important considerations:
### Explicit Field Selection
The v3 search endpoint returns only issue IDs by default. The script explicitly requests `fields=*all` to retrieve all fields. Without this parameter, you'll get issues with no title, description, or other metadata.
### Atlassian Document Format (ADF)
Jira API v3 returns rich text fields (like `description`) in Atlassian Document Format - a JSON structure rather than plain text or HTML. The script automatically converts ADF to markdown:
**ADF input:**
```json
{"type": "doc", "content": [{"type": "heading", "attrs": {"level": 3}, "content": [{"type": "text", "text": "Overview"}]}]}
```
**Converted output:**
```markdown
### Overview
```
Supported ADF node types: paragraph, heading, bulletList, orderedList, listItem, codeBlock, blockquote, hardBreak, rule, inlineCard, mention, and text nodes.
---
# Export: jsonl2jira.py

View File

@@ -130,6 +130,67 @@ def generate_hash_id(
return f"{prefix}-{short_hash}"
def adf_to_text(node: Any) -> str:
"""
Convert Atlassian Document Format (ADF) to plain text/markdown.
ADF is returned by Jira API v3 for rich text fields like description.
"""
if node is None:
return ""
if isinstance(node, str):
return node
if not isinstance(node, dict):
return ""
node_type = node.get("type", "")
content = node.get("content", [])
text = node.get("text", "")
# Text node - just return the text
if node_type == "text":
return text
# Recursively process content
children_text = "".join(adf_to_text(child) for child in content)
# Handle different node types
if node_type == "doc":
return children_text.strip()
elif node_type == "paragraph":
return children_text + "\n\n"
elif node_type == "heading":
level = node.get("attrs", {}).get("level", 1)
prefix = "#" * level
return f"{prefix} {children_text}\n\n"
elif node_type == "bulletList":
return children_text
elif node_type == "orderedList":
return children_text
elif node_type == "listItem":
return f"- {children_text.strip()}\n"
elif node_type == "codeBlock":
lang = node.get("attrs", {}).get("language", "")
return f"```{lang}\n{children_text}```\n\n"
elif node_type == "blockquote":
lines = children_text.strip().split("\n")
return "\n".join(f"> {line}" for line in lines) + "\n\n"
elif node_type == "hardBreak":
return "\n"
elif node_type == "rule":
return "---\n\n"
elif node_type == "inlineCard":
url = node.get("attrs", {}).get("url", "")
return url
elif node_type == "mention":
return f"@{node.get('attrs', {}).get('text', '')}"
else:
# For unknown types, just return children text
return children_text
def get_bd_config(key: str) -> Optional[str]:
"""Get a configuration value from bd config."""
try:
@@ -382,7 +443,7 @@ class JiraToBeads:
# Use API v3 (v2 deprecated and returns HTTP 410 Gone)
# See: https://developer.atlassian.com/changelog/#CHANGE-2046
api_url = f"{url}/rest/api/3/search/jql"
params = f"jql={quote(query)}&startAt={start_at}&maxResults={max_results}&expand=changelog"
params = f"jql={quote(query)}&startAt={start_at}&maxResults={max_results}&fields=*all&expand=changelog"
full_url = f"{api_url}?{params}"
headers = {
@@ -546,7 +607,8 @@ class JiraToBeads:
bd_id = None
max_length = 8
title = fields.get("summary", "")
description = fields.get("description") or ""
raw_desc = fields.get("description")
description = adf_to_text(raw_desc) if isinstance(raw_desc, dict) else (raw_desc or "")
for length in range(self.hash_length, max_length + 1):
for nonce in range(10):
@@ -586,11 +648,13 @@ class JiraToBeads:
updated_at = self.parse_jira_timestamp(fields.get("updated"))
resolved_at = self.parse_jira_timestamp(fields.get("resolutiondate"))
# Build bd issue
# Build bd issue - convert ADF description to text
raw_desc = fields.get("description")
desc_text = adf_to_text(raw_desc) if isinstance(raw_desc, dict) else (raw_desc or "")
issue = {
"id": bd_id,
"title": fields.get("summary", ""),
"description": fields.get("description") or "",
"description": desc_text,
"status": self.map_status(fields.get("status")),
"priority": self.map_priority(fields.get("priority")),
"issue_type": self.map_issue_type(fields.get("issuetype")),