jsoup: manipulation on text -
i have html file, example:
<!doctype html> <html> <body> <h1>my first heading</h1> <p>my first paragraph.</p> </body> </html>
i have written method in java, converts text symbols latin cyrillic, like:
public static char changeletterlatcyr(char charsent) { char l_a = 'a', l_a = 'a', l_b = 'b', l_b = 'b', r_a = 'А', r_a = 'а', r_b = 'Б', r_b = 'б', result = ' '; if (charsent == l_a) { result = r_a; } else if (charsent == l_a) { result = r_a; } else if (charsent == l_b) { result = r_b; } else if (charsent == l_b) { result = r_b; } else { result = charsent; } return result; }
how implement function on text in document saving tag structures? function changes every char specific.
i need result:
<!doctype html> <html> <body> <h1>some manipulation on text</h1> <p>some manipulation on text</p> </body> </html>
here how can it. nodevisitor magic here.
java
package com.github.davidepastore.stackoverflow33463949; import java.io.ioexception; import java.io.inputstream; import org.jsoup.jsoup; import org.jsoup.nodes.document; import org.jsoup.nodes.node; import org.jsoup.nodes.textnode; import org.jsoup.select.elements; import org.jsoup.select.nodevisitor; /** * stackoverflow 33463949 question. * */ public class app { /** * starts app here. * @param args * @throws ioexception */ public static void main(string[] args) throws ioexception { classloader classloader = thread.currentthread() .getcontextclassloader(); inputstream = classloader.getresourceasstream("file.html"); document document = jsoup.parse(is, "utf-8", ""); elements elements = document.select("body"); manipulateelements(elements); system.out.println("result: " + document.tostring()); } /** * manipulate {@link elements}. * @param elements {@link elements} manipulate. */ private static void manipulateelements(elements elements) { elements.traverse(new nodevisitor() { public void tail(node node, int depth) { } public void head(node node, int depth) { if (node instanceof textnode) { textnode textnode = (textnode) node; string text = textnode.text().trim(); if (!text.isempty()) { char[] newchars = new char[text.length()]; (int = 0; < text.length(); i++) { newchars[i] = changeletterlatcyr(text.charat(i)); } textnode.text(new string(newchars)); } } } }); } /** * own custom change letter method. * @param charsent char convert. * @return returns converted char. */ public static char changeletterlatcyr(char charsent) { char l_a = 'a', l_a = 'a', l_b = 'b', l_b = 'b', r_a = 'А', r_a = 'а', r_b = 'Б', r_b = 'б', result = ' '; if (charsent == l_a) { result = r_a; } else if (charsent == l_a) { result = r_a; } else if (charsent == l_b) { result = r_b; } else if (charsent == l_b) { result = r_b; } else { result = charsent; } return result; } }
html
<!doctype html> <html> <body> <h1>my first heading</h1> <p>my first paragraph.</p> <div> <p>a b b complex structure</p> </div> </body> </html>
output
result: <!doctype html> <html> <head></head> <body> <h1>my first heаding</h1> <p>my first pаrаgrаph.</p> <div> <p>А а Б б complex structure</p> </div> </body> </html>
Comments
Post a Comment