Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 67 additions & 27 deletions libraries/to-string/go/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"encoding/json"
"errors"
"fmt"
"slices"
"strings"

contenttree "github.com/Financial-Times/content-tree"
Expand All @@ -23,11 +22,7 @@ var (
BodyTree Schema = schema("body-tree")
)

var (
ErrUnknownKind = errors.New("unknown tree kind")
)

var toSeparate = []string{contenttree.HeadingType, contenttree.ParagraphType}
var ErrUnknownKind = errors.New("unknown tree kind")

// Transform extracts and returns plain text from a content tree represented as unmarshalled JSON(json.RawMessage).
func Transform(tree json.RawMessage, s Schema) (string, error) {
Expand Down Expand Up @@ -59,23 +54,13 @@ func unmarshalAndTransform(tree json.RawMessage, n contenttree.Node) (string, er
text = strings.TrimSpace(text)

return text, nil

}

func transformNode(n contenttree.Node) (string, error) {
if n == nil {
return "", errors.New("nil node")
}

if n.GetType() == contenttree.RootType {
root, ok := n.(*contenttree.Root)
if !ok {
return "", errors.New("failed to parse node to root")
}

return transformNode(root.Body)
}

switch n.GetType() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: we should add case: contenttree.TimelineEventChildType node for completeness.

The code works without it because in the default case we are calling n.GetChildren() which internally calls the GetChildren on the embedded node. For example,

func (n *TimelineEventChild) GetChildren() []Node {
     if n.Paragraph != nil {
        return n.Paragraph.GetChildren()
     }
    if n.ImageSet != nil {
       return n.ImageSet.GetChildren()
    }
    return nil
}

So theoretically, we could get rid of all the checks where we return transformNode(n.GetEmbedded()).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think removing the transformNode(n.GetEmbedded()) would be the better approach here.
No need to burden the switch with more cases than needed. What do you think @todor-videv1?

Copy link

@todor-videv1 todor-videv1 Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two options here... if we are sure we can safely remove the n.GetEmbedded()... less is more, I'd advse to leave a comment in the code how the default case is dealing with these embeds though.

If we have even an ounce of doubt, add the additional case like Lokender suggests, and then when we fix the tests, we can do this enhancement.

case contenttree.BodyBlockType:
return transformNode(n.GetEmbedded())
Expand All @@ -95,18 +80,79 @@ func transformNode(n contenttree.Node) (string, error) {
return transformNode(n.GetEmbedded())
case contenttree.TableChildType:
return transformNode(n.GetEmbedded())
}

if n.GetType() == contenttree.TextType {
case contenttree.RecommendedType:
return "", nil
case contenttree.RecommendedListType:
return "", nil
case contenttree.TextType:
text, ok := n.(*contenttree.Text)
if !ok {
return "", errors.New("failed to parse node to text")
}

return text.Value, nil
case contenttree.RootType:
root, ok := n.(*contenttree.Root)
if !ok {
return "", errors.New("failed to parse node to root")
}

return transformNode(root.Body)
case contenttree.BigNumberType:
bigNumber, ok := n.(*contenttree.BigNumber)
if !ok {
return "", errors.New("failed to parse node to bigNumber")
}

return fmt.Sprintf("%s %s ", bigNumber.Number, bigNumber.Description), nil
case contenttree.PullquoteType:
pq, ok := n.(*contenttree.Pullquote)
if !ok {
return "", errors.New("failed to parse node to Pullquote")
}

return fmt.Sprintf("%s ", pq.Text), nil
case contenttree.InNumbersType:
in, ok := n.(*contenttree.InNumbers)
if !ok {
return "", errors.New("failed to parse node to InNumbers")
}
resultChildred, err := transformChildren(in.GetChildren())
if err != nil {
return "", err
}

return fmt.Sprintf("%s %s ", in.Title, resultChildred), nil
case contenttree.DefinitionType:
def, ok := n.(*contenttree.Definition)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Here the node should be typecast to contenttree.InNumber and the title field on it should be used.

in, ok := n.(*contenttree.InNumbers)
if !ok {
         return "", errors.New("failed to parse node to InNumbers")
}
 resultChildred, err := transformChildren(in.GetChildren())
if err != nil {
     return "", err
}
return fmt.Sprintf("%s %s", in.Title, resultChildred), nil

Then, we will need an additional case for Definition type.

if !ok {
return "", errors.New("failed to parse node to Definition")
}

return fmt.Sprintf("%s %s ", def.Term, def.Description), nil
case contenttree.TimelineEventType:
te, ok := n.(*contenttree.TimelineEvent)
if !ok {
return "", errors.New("failed to parse node to text")
}

resultChildred, err := transformChildren(te.GetChildren())
if err != nil {
return "", err
}

return fmt.Sprintf("%s %s ", te.Title, resultChildred), nil
default:
result, err := transformChildren(n.GetChildren())
if err != nil {
return "", err
}

return result, nil
}
}

childrenNodes := n.GetChildren()
func transformChildren(childrenNodes []contenttree.Node) (string, error) {
if len(childrenNodes) == 0 {
return "", nil
}
Expand All @@ -121,11 +167,5 @@ func transformNode(n contenttree.Node) (string, error) {
childrenStrs = append(childrenStrs, s)
}

result := strings.Join(childrenStrs, "")

if slices.Contains(toSeparate, n.GetType()) && len(result) > 0 {
result += " "
}

return result, nil
return strings.Join(childrenStrs, " "), nil
}
11 changes: 11 additions & 0 deletions tests/body-tree-to-string/input/BigNumber.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"type": "body",
"children": [
{
"type": "big-number",
"number": "1947",
"description": "Year of release for ‘It Always Rains On Sunday’"
}
],
"version": 1
}
27 changes: 27 additions & 0 deletions tests/body-tree-to-string/input/InNumber.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"type": "body",
"children": [
{
"type": "in-numbers",
"title": "Ipsum",
"children": [
{
"type": "definition",
"term": "111",
"description": "Stat one"
},
{
"type": "definition",
"term": "222",
"description": "Stat two"
},
{
"type": "definition",
"term": "333",
"description": "Stat three"
}
]
}
],
"version": 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"type": "body",
"children": [
{
"type": "paragraph",
"children": [
{
"type": "text",
"value": "Kitchen sink."
},
{
"type": "text",
"value": "Kitchen sink."
}
]
}
],
"version": 1
}
11 changes: 11 additions & 0 deletions tests/body-tree-to-string/input/PullQuote.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"type": "body",
"children": [
{
"type": "pullquote",
"text": "Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films",
"source": "Wikipedia"
}
],
"version": 1
}
20 changes: 20 additions & 0 deletions tests/body-tree-to-string/input/PullQuoteAndText.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"type": "body",
"children": [
{
"type": "pullquote",
"text": "Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films",
"source": "Wikipedia"
},
{
"type": "paragraph",
"children": [
{
"type": "text",
"value": "Kitchen sink realism artists painted everyday objects, such as trash cans and beer bottles. The critic David Sylvester wrote an article in 1954 about trends in recent English art, calling his article “The Kitchen Sink” in reference to Bratby’s picture. Sylvester argued that there was a new interest among young painters in domestic scenes, with stress on the banality of life. Other artists associated with the kitchen sink style include Derrick Greaves, Edward Middleditch and Jack Smith."
}
]
}
],
"version": 1
}
26 changes: 26 additions & 0 deletions tests/body-tree-to-string/input/timeline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"type": "body",
"children": [
{
"type": "layout",
"layoutName": "timeline",
"layoutWidth": "full-width",
"data": {
"layout": "timeline"
},
"children": [
{
"type": "heading",
"level": "subheading",
"children": [
{
"type": "text",
"value": "The evolution of kitchen sink drama films "
}
]
}
]
}
],
"version": 1
}
1 change: 1 addition & 0 deletions tests/body-tree-to-string/output/BigNumber.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1947 Year of release for ‘It Always Rains On Sunday’
1 change: 1 addition & 0 deletions tests/body-tree-to-string/output/InNumber.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ipsum 111 Stat one 222 Stat two 333 Stat three
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Kitchen sink. Kitchen sink.
1 change: 1 addition & 0 deletions tests/body-tree-to-string/output/PullQuote.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films
1 change: 1 addition & 0 deletions tests/body-tree-to-string/output/PullQuoteAndText.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films Kitchen sink realism artists painted everyday objects, such as trash cans and beer bottles. The critic David Sylvester wrote an article in 1954 about trends in recent English art, calling his article “The Kitchen Sink” in reference to Bratby’s picture. Sylvester argued that there was a new interest among young painters in domestic scenes, with stress on the banality of life. Other artists associated with the kitchen sink style include Derrick Greaves, Edward Middleditch and Jack Smith.
1 change: 1 addition & 0 deletions tests/body-tree-to-string/output/timeline.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The evolution of kitchen sink drama films
Loading