-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/uppsf 6584 extend the to string transformer #125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ilian2233
wants to merge
8
commits into
main
Choose a base branch
from
feature/UPPSF-6584-extend-the-to-string-transformer
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
77ee2b3
Added TimelineEvent
ilian2233 1dbea91
Added recommended and recommendedList
ilian2233 e56364e
Added BigNumber
ilian2233 635a38e
Added Pullqote
ilian2233 627674e
Added InNumber
ilian2233 f91f3f1
Added tests
ilian2233 0d0c798
Improved spacing logic
ilian2233 cef1a0b
Improved InNumber handling
ilian2233 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,6 @@ import ( | |
| "encoding/json" | ||
| "errors" | ||
| "fmt" | ||
| "slices" | ||
| "strings" | ||
|
|
||
| contenttree "github.com/Financial-Times/content-tree" | ||
|
|
@@ -23,11 +22,7 @@ var ( | |
| BodyTree Schema = schema("body-tree") | ||
| ) | ||
|
|
||
| var ( | ||
| ErrUnknownKind = errors.New("unknown tree kind") | ||
| ) | ||
|
|
||
| var toSeparate = []string{contenttree.HeadingType, contenttree.ParagraphType} | ||
| var ErrUnknownKind = errors.New("unknown tree kind") | ||
|
|
||
| // Transform extracts and returns plain text from a content tree represented as unmarshalled JSON(json.RawMessage). | ||
| func Transform(tree json.RawMessage, s Schema) (string, error) { | ||
|
|
@@ -59,23 +54,13 @@ func unmarshalAndTransform(tree json.RawMessage, n contenttree.Node) (string, er | |
| text = strings.TrimSpace(text) | ||
|
|
||
| return text, nil | ||
|
|
||
| } | ||
|
|
||
| func transformNode(n contenttree.Node) (string, error) { | ||
| if n == nil { | ||
| return "", errors.New("nil node") | ||
| } | ||
|
|
||
| if n.GetType() == contenttree.RootType { | ||
| root, ok := n.(*contenttree.Root) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to root") | ||
| } | ||
|
|
||
| return transformNode(root.Body) | ||
| } | ||
|
|
||
| switch n.GetType() { | ||
| case contenttree.BodyBlockType: | ||
| return transformNode(n.GetEmbedded()) | ||
|
|
@@ -95,18 +80,79 @@ func transformNode(n contenttree.Node) (string, error) { | |
| return transformNode(n.GetEmbedded()) | ||
| case contenttree.TableChildType: | ||
| return transformNode(n.GetEmbedded()) | ||
| } | ||
|
|
||
| if n.GetType() == contenttree.TextType { | ||
| case contenttree.RecommendedType: | ||
| return "", nil | ||
| case contenttree.RecommendedListType: | ||
| return "", nil | ||
| case contenttree.TextType: | ||
| text, ok := n.(*contenttree.Text) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to text") | ||
| } | ||
|
|
||
| return text.Value, nil | ||
| case contenttree.RootType: | ||
| root, ok := n.(*contenttree.Root) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to root") | ||
| } | ||
|
|
||
| return transformNode(root.Body) | ||
| case contenttree.BigNumberType: | ||
| bigNumber, ok := n.(*contenttree.BigNumber) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to bigNumber") | ||
| } | ||
|
|
||
| return fmt.Sprintf("%s %s ", bigNumber.Number, bigNumber.Description), nil | ||
| case contenttree.PullquoteType: | ||
| pq, ok := n.(*contenttree.Pullquote) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to Pullquote") | ||
| } | ||
|
|
||
| return fmt.Sprintf("%s ", pq.Text), nil | ||
| case contenttree.InNumbersType: | ||
| in, ok := n.(*contenttree.InNumbers) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to InNumbers") | ||
| } | ||
| resultChildred, err := transformChildren(in.GetChildren()) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
|
|
||
| return fmt.Sprintf("%s %s ", in.Title, resultChildred), nil | ||
| case contenttree.DefinitionType: | ||
| def, ok := n.(*contenttree.Definition) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: Here the node should be typecast to contenttree.InNumber and the title field on it should be used. Then, we will need an additional case for |
||
| if !ok { | ||
| return "", errors.New("failed to parse node to Definition") | ||
| } | ||
|
|
||
| return fmt.Sprintf("%s %s ", def.Term, def.Description), nil | ||
| case contenttree.TimelineEventType: | ||
| te, ok := n.(*contenttree.TimelineEvent) | ||
| if !ok { | ||
| return "", errors.New("failed to parse node to text") | ||
| } | ||
|
|
||
| resultChildred, err := transformChildren(te.GetChildren()) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
|
|
||
| return fmt.Sprintf("%s %s ", te.Title, resultChildred), nil | ||
| default: | ||
| result, err := transformChildren(n.GetChildren()) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
|
|
||
| return result, nil | ||
| } | ||
| } | ||
|
|
||
| childrenNodes := n.GetChildren() | ||
| func transformChildren(childrenNodes []contenttree.Node) (string, error) { | ||
| if len(childrenNodes) == 0 { | ||
| return "", nil | ||
| } | ||
|
|
@@ -121,11 +167,5 @@ func transformNode(n contenttree.Node) (string, error) { | |
| childrenStrs = append(childrenStrs, s) | ||
| } | ||
|
|
||
| result := strings.Join(childrenStrs, "") | ||
|
|
||
| if slices.Contains(toSeparate, n.GetType()) && len(result) > 0 { | ||
| result += " " | ||
| } | ||
|
|
||
| return result, nil | ||
| return strings.Join(childrenStrs, " "), nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "big-number", | ||
| "number": "1947", | ||
| "description": "Year of release for ‘It Always Rains On Sunday’" | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "in-numbers", | ||
| "title": "Ipsum", | ||
| "children": [ | ||
| { | ||
| "type": "definition", | ||
| "term": "111", | ||
| "description": "Stat one" | ||
| }, | ||
| { | ||
| "type": "definition", | ||
| "term": "222", | ||
| "description": "Stat two" | ||
| }, | ||
| { | ||
| "type": "definition", | ||
| "term": "333", | ||
| "description": "Stat three" | ||
| } | ||
| ] | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
19 changes: 19 additions & 0 deletions
19
tests/body-tree-to-string/input/ParagraphWithMultipleChildren.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "paragraph", | ||
| "children": [ | ||
| { | ||
| "type": "text", | ||
| "value": "Kitchen sink." | ||
| }, | ||
| { | ||
| "type": "text", | ||
| "value": "Kitchen sink." | ||
| } | ||
| ] | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "pullquote", | ||
| "text": "Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films", | ||
| "source": "Wikipedia" | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "pullquote", | ||
| "text": "Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films", | ||
| "source": "Wikipedia" | ||
| }, | ||
| { | ||
| "type": "paragraph", | ||
| "children": [ | ||
| { | ||
| "type": "text", | ||
| "value": "Kitchen sink realism artists painted everyday objects, such as trash cans and beer bottles. The critic David Sylvester wrote an article in 1954 about trends in recent English art, calling his article “The Kitchen Sink” in reference to Bratby’s picture. Sylvester argued that there was a new interest among young painters in domestic scenes, with stress on the banality of life. Other artists associated with the kitchen sink style include Derrick Greaves, Edward Middleditch and Jack Smith." | ||
| } | ||
| ] | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| { | ||
| "type": "body", | ||
| "children": [ | ||
| { | ||
| "type": "layout", | ||
| "layoutName": "timeline", | ||
| "layoutWidth": "full-width", | ||
| "data": { | ||
| "layout": "timeline" | ||
| }, | ||
| "children": [ | ||
| { | ||
| "type": "heading", | ||
| "level": "subheading", | ||
| "children": [ | ||
| { | ||
| "type": "text", | ||
| "value": "The evolution of kitchen sink drama films " | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
| ], | ||
| "version": 1 | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 1947 Year of release for ‘It Always Rains On Sunday’ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Ipsum 111 Stat one 222 Stat two 333 Stat three |
1 change: 1 addition & 0 deletions
1
tests/body-tree-to-string/output/ParagraphWithMultipleChildren.text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Kitchen sink. Kitchen sink. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Before the 1950s, the United Kingdom's working class were often depicted stereotypically in Noël Coward's drawing room comedies and British films Kitchen sink realism artists painted everyday objects, such as trash cans and beer bottles. The critic David Sylvester wrote an article in 1954 about trends in recent English art, calling his article “The Kitchen Sink” in reference to Bratby’s picture. Sylvester argued that there was a new interest among young painters in domestic scenes, with stress on the banality of life. Other artists associated with the kitchen sink style include Derrick Greaves, Edward Middleditch and Jack Smith. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| The evolution of kitchen sink drama films |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion: we should add
case: contenttree.TimelineEventChildTypenode for completeness.The code works without it because in the default case we are calling n.GetChildren() which internally calls the GetChildren on the embedded node. For example,
So theoretically, we could get rid of all the checks where we return transformNode(n.GetEmbedded()).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think removing the
transformNode(n.GetEmbedded())would be the better approach here.No need to burden the switch with more cases than needed. What do you think @todor-videv1?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two options here... if we are sure we can safely remove the n.GetEmbedded()... less is more, I'd advse to leave a comment in the code how the default case is dealing with these embeds though.
If we have even an ounce of doubt, add the additional case like Lokender suggests, and then when we fix the tests, we can do this enhancement.