|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectnet.web.UrlUtils
public class UrlUtils
Field Summary | |
---|---|
static java.lang.String |
ACCEPT
|
static java.lang.String |
AGENT_NAME
|
static java.lang.String |
CONTENT_LENGTH
|
static java.lang.String |
CONTENT_TYPE
|
static java.lang.String |
DATE_HEADER
|
static java.lang.String |
FILE_URL_PREFIX
|
static java.lang.String |
HEADER_PRAGMA
|
static java.lang.String |
HTTP_URL_PREFIX
|
static java.lang.String |
METHOD_DELETE
|
static java.lang.String |
METHOD_GET
|
static java.lang.String |
METHOD_HEAD
|
static java.lang.String |
METHOD_OPTIONS
|
static java.lang.String |
METHOD_POST
|
static java.lang.String |
METHOD_PUT
|
static java.lang.String |
METHOD_TRACE
|
static java.lang.String |
SOAPACTION
|
static java.lang.String |
SUPPORTED
|
static java.lang.String |
USER_AGENT
|
Constructor Summary | |
---|---|
UrlUtils()
|
Method Summary | |
---|---|
static java.lang.String |
conditionUrl(java.lang.String s)
Replace the spaces with %20 |
static void |
dataMineCTC()
|
static java.lang.String |
encode(java.lang.String s)
|
static java.lang.String |
encodePath(java.lang.String path)
|
static java.net.URL |
fileToURL(java.io.File file)
Returns the directory or JAR file URL corresponding to the specified local file name. |
static byte[] |
getBytes(java.net.URL url)
Read in entire url affineTransform once, into an array of bytes, and return. |
static java.lang.String |
getContentType(java.lang.String urlString)
Opens a connection to the given URL, gets the content type, then closes \ the connection. |
static java.lang.String[] |
getHrefs(java.lang.String s,
java.lang.String containingString)
Search the string, s, for all the hrefs that contain the suffix |
static java.lang.String[] |
getHrefs(java.lang.String s,
java.lang.String searchString,
java.lang.String rootUrl)
Search the string, s, for all the hrefs that are uppercase HREF= |
static java.lang.String |
getOneBigUrlString(java.net.URL url)
Read in all the text at once, from a given url, and return it in one big string |
static java.lang.String |
getTableToCSVText(java.net.URL url)
Strip out the html tags and just get the text from the URL Makes CSV data |
static java.lang.String[] |
getTxtGz(java.net.URL url)
Given a URL of the form fn.txt.gz, this returns an array of string, one per line. |
static java.util.Vector |
getUrl(java.lang.String _urlString)
|
static java.util.Vector |
getUrl(java.net.URL url)
|
static void |
getUrl(java.net.URL url,
java.io.File f)
Read a url and put it into a file. |
static void |
getUrlBinary(java.io.File f,
java.net.URL url)
|
static void |
getUrlBinary(java.net.URL url)
|
static java.util.Date |
getUrlModificationDate(java.net.URL url)
|
static java.lang.String[] |
getUrlString(java.lang.String[] urlStrings)
Get the text from each url listed in the urlString array and create on big array. |
static java.lang.String[] |
getUrlStrings(java.net.URL urlString)
Call this routine if you want one HTML line per array element. |
static java.lang.String[] |
getUrlStrings(java.net.URL url,
java.lang.String uid,
java.lang.String pw)
Use basic authentication in order to retrieve the url as an array of strings. |
static java.util.Vector |
getUrlVector(java.net.URL url)
|
static java.lang.String |
getUTF8Encoding()
|
static java.lang.String |
html2text(java.lang.String s)
|
static java.lang.String |
html2text(java.net.URL url)
Input The HTML at the URL |
static boolean |
isFileUrl(java.lang.String url)
|
static boolean |
isHttpUrl(java.lang.String url)
|
static java.net.URL[] |
isolateLinks(java.lang.String suffix,
java.net.URL[] urls)
|
static boolean |
isURLAvailable(java.lang.String urlString,
int timeOut)
Utility method to detect whether an URL is accessible within a timeout. |
static java.util.Vector |
lookForJobs()
|
static void |
main(java.lang.String[] args)
|
static java.net.URL[] |
pathToURLs(java.lang.String path)
Utility method for converting a search path string to an array of directory and JAR file URLs. |
static void |
printLabels()
|
static void |
processGz(java.net.URL url,
LineProcessor lp)
|
static java.lang.String |
receiveHttpMessage(java.net.Socket socket)
|
static void |
sendHttpMessage(java.net.Socket socket,
java.lang.String command)
|
static void |
testEdgarExtract()
|
static void |
textStreamProcessor(java.net.URL u,
LineProcessor lineProcessor)
|
static java.lang.String |
toString(java.lang.String url)
|
static java.lang.String |
urlEncode(java.lang.String url)
This will encode a urls odd characters, but I don't know what the default encoding scheme should be, so I set it to null. |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final java.lang.String HTTP_URL_PREFIX
public static final java.lang.String FILE_URL_PREFIX
public static final java.lang.String AGENT_NAME
public static final java.lang.String CONTENT_LENGTH
public static final java.lang.String CONTENT_TYPE
public static final java.lang.String DATE_HEADER
public static final java.lang.String SUPPORTED
public static final java.lang.String ACCEPT
public static final java.lang.String SOAPACTION
public static final java.lang.String USER_AGENT
public static final java.lang.String METHOD_DELETE
public static final java.lang.String METHOD_HEAD
public static final java.lang.String METHOD_GET
public static final java.lang.String METHOD_OPTIONS
public static final java.lang.String METHOD_POST
public static final java.lang.String METHOD_PUT
public static final java.lang.String METHOD_TRACE
public static final java.lang.String HEADER_PRAGMA
Constructor Detail |
---|
public UrlUtils()
Method Detail |
---|
public static void sendHttpMessage(java.net.Socket socket, java.lang.String command) throws java.io.IOException
java.io.IOException
public static java.lang.String encodePath(java.lang.String path)
public static java.lang.String getUTF8Encoding()
public static java.lang.String receiveHttpMessage(java.net.Socket socket) throws java.io.IOException
java.io.IOException
public static void printLabels() throws java.io.IOException, javax.swing.text.BadLocationException
java.io.IOException
javax.swing.text.BadLocationException
public static java.lang.String getContentType(java.lang.String urlString) throws java.io.IOException
java.io.IOException
public static java.util.Vector lookForJobs() throws java.io.IOException
java.io.IOException
public static java.lang.String getTableToCSVText(java.net.URL url) throws java.io.IOException, javax.swing.text.BadLocationException
url
- source of data
java.io.IOException
javax.swing.text.BadLocationException
public static java.lang.String html2text(java.lang.String s) throws java.io.IOException, javax.swing.text.BadLocationException
java.io.IOException
javax.swing.text.BadLocationException
public static java.lang.String html2text(java.net.URL url) throws java.io.IOException, javax.swing.text.BadLocationException
url
- the source of the html
java.io.IOException
javax.swing.text.BadLocationException
public static java.lang.String[] getHrefs(java.lang.String s, java.lang.String containingString)
s
- raw html used for the searchcontainingString
- stuff we are looking for in the href
public static java.lang.String[] getHrefs(java.lang.String s, java.lang.String searchString, java.lang.String rootUrl)
s
- raw html used for the search
public static java.lang.String getOneBigUrlString(java.net.URL url) throws java.io.IOException
url
- to read from
java.io.IOException
public static void getUrl(java.net.URL url, java.io.File f) throws java.io.IOException
url
- input file (like data.jar)f
- locally created output file.
java.io.IOException
public static byte[] getBytes(java.net.URL url)
url
- an input url that points to binary data
public static java.lang.String[] getUrlStrings(java.net.URL urlString) throws java.io.IOException
String s[] = UrlUtils.getUrlString(new URL("http://www.docjava.com"));
urlString
- input URL
java.io.IOException
public static java.lang.String[] getUrlString(java.lang.String[] urlStrings) throws java.io.IOException
urlStrings
-
java.io.IOException
public static java.lang.String conditionUrl(java.lang.String s)
s
-
public static java.net.URL[] isolateLinks(java.lang.String suffix, java.net.URL[] urls)
suffix
- the string representation of the urls must end with the suffixurls
- a list of urls to sort through
public static void getUrlBinary(java.net.URL url) throws java.io.IOException
java.io.IOException
public static void getUrlBinary(java.io.File f, java.net.URL url) throws java.io.IOException
java.io.IOException
public static java.util.Vector getUrlVector(java.net.URL url) throws java.io.IOException
java.io.IOException
public static java.lang.String[] getTxtGz(java.net.URL url) throws java.io.IOException
url
-
java.io.IOException
public static void processGz(java.net.URL url, LineProcessor lp) throws java.io.IOException
java.io.IOException
public static java.lang.String urlEncode(java.lang.String url) throws java.io.UnsupportedEncodingException
url
-
java.io.UnsupportedEncodingException
public static java.lang.String toString(java.lang.String url)
public static java.util.Vector getUrl(java.lang.String _urlString)
public static java.util.Vector getUrl(java.net.URL url) throws java.io.IOException
java.io.IOException
public static java.util.Date getUrlModificationDate(java.net.URL url) throws java.io.IOException
java.io.IOException
public static java.net.URL[] pathToURLs(java.lang.String path)
path
- the search path string
public static java.net.URL fileToURL(java.io.File file)
file
- the File object
public static boolean isURLAvailable(java.lang.String urlString, int timeOut)
urlString
- the URLtimeOut
- timeout in milliseconds
public static boolean isFileUrl(java.lang.String url)
public static boolean isHttpUrl(java.lang.String url)
public static java.lang.String[] getUrlStrings(java.net.URL url, java.lang.String uid, java.lang.String pw) throws java.io.IOException
url
- the url (perhaps with cgi arguments)uid
- a required uid for accesspw
- a base 64 encoded, insecure password to be passed (careful!)
java.io.IOException
- if the url cannot be openedpublic static void testEdgarExtract() throws java.io.IOException
java.io.IOException
public static void textStreamProcessor(java.net.URL u, LineProcessor lineProcessor) throws java.io.IOException
java.io.IOException
public static java.lang.String encode(java.lang.String s)
public static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException
public static void dataMineCTC() throws java.io.IOException
java.io.IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |