Note: Python will not print decimals if none of the numbers is an integer.
a = 2
b = 3.
c = b/a
print(c)
text = "The quick brown fox jumped over the lazy log."
mytext = text.split()
print(mytext)
print(" ".join(mytext))
print(text.find("fox"))
count = 0
for x in mytext:
if "e" in x:
count += 1
print("'e' is repeated",count, "times")
The user input is taken as "b" and compared to the variable "a".
a = 5
b = int(input("Give me a number:"))
if b == a:
print("Yes, right")
else:
print("No, No..!")
First create a file named "ques.txt" and enter some questions in it. Read the file then in your Python code. Consider that it is an interview and you need to register the time as well. Ask the person name first and then save the file under his name.
import datetime
file = open('ques.txt', 'r')
username = input("What is your name? ")
myfile = open(username+".txt", "a")
myfile.close()
output = open(username+".txt", "r+")
for line in file:
the_reply = input(line)
line1= line+"{0}: {1} \n".format(username, the_reply)
output.write(line1)
output.write("\n")
output.write("Interview Time: "+str(datetime.datetime.now().time()))
output.close()
Read the file that you created line by line with answers from the interviewee.
file = open("User.txt", "r")
mylines = file.readlines()
for line in mylines:
my_next = mylines.index(line)+1
if "How old are you?" in line:
print(mylines[my_next])
if "What is your job?" in line:
print(mylines[my_next])
if "Where are you from?" in line:
print(mylines[my_next])
Create an empty dictionary and add the questions and answers to it.
file = open("User.txt", "r")
mylines = file.readlines()
mydict = {}
for line in mylines:
my_next = mylines.index(line)+1
if "How old are you?" in line:
mydict[mylines[my_next]]=mylines.index(line)
if "What is your job?" in line:
mydict[mylines[my_next]]=mylines.index(line)
if "Where are you from?" in line:
mydict[mylines[my_next]]=mylines.index(line)
print(mydict)
To know how to sort the dict.
file = open("User.txt", "r")
mylines = file.readlines()
mydict = {}
for line in mylines:
my_line = line.replace("\n", "")
mydict[my_line]=mylines.index(line)
print(sorted(mydict.items(), key=lambda x: x[1]))
from flask import Flask
from flask import request
app = Flask(__name__)
@app.route('/')
def index():
return 'Welcome'
@app.route('/login')
def login():
username = request.args.get('username')
password = request.args.get('password')
return '%s is' %username
@app.route('/user/<username>')
def show_user_profile(username):
# show the user profile for that user
return '<html><body>This is a tes to see if <span style="font-weight:bold">%s</span> is shown in middle of html text</body></html>' % username
@app.route('/post/<int:post_id>')
def show_post(post_id):
# show the post with the given id, the id is an integer
return 'Post %d' % post_id
@app.route('/test')
def test():
return 'this is a test'
if __name__ == '__main__':
app.run(host='127.0.0.1', port=5000)
To run the code, type in the command line:
$ python test.py arg1 arg2 arg3
in which arguments can be for example "user1.txt" "user2.txt", etc
#!/usr/bin/python
import sys
# Get the total number of args passed to the demo.py
total = len(sys.argv)
# Get the arguments list
cmdargs = str(sys.argv)
# Print it
print ("The total numbers of args passed to the script: %d " % total)
print ("Args list: %s " % cmdargs)
mydict = {}
excepts = sys.argv[0]
sys.argv.remove(excepts)
for x in sys.argv:
print(x, end=" ")
file = open(x, "r")
f = file.readlines()
for line in f:
print(line, end=" ")
NLTK tokenizer is used to separate the sentences. The structure is somehow what I need. It should be completed!
import glob
from nltk.tokenize import sent_tokenize
my_files = glob.glob('Arsam.txt')
mylist = []
for i in my_files:
file = open(i, 'r')
data = file.read()
sent_tokenize_list = sent_tokenize(data)
count = len(sent_tokenize_list)
mylist.append(i)
for x in range(count):
myformat = (sent_tokenize_list[x],'MX')
mylist.append(myformat)
print(mylist)
Save the code as "server.js" and run it with:
$ node server.js
var express = require('express');
var app = express();
app.get('/', function (req, res) {
res.send('Hello World');
})
var server = app.listen(8081, function () {
var host = server.address().address
var port = server.address().port
console.log("Example app listening at http://%s:%s", host, port)
})
Using BeautifulSoup
import urllib2
from bs4 import BeautifulSoup
url = urllib2.urlopen("http://aclanthology.info/").read()
soup = BeautifulSoup(url)
for line in soup.find_all('bib'):
print(line.get('href'))
Some practices for loop through data in the lists
z = [ 'a', 'b', 'c', 'd' ]
i = 0
while i < len(z):
print(i, z[i])
i += 1
#adds a line between results
print()
for i in range(0, len(z)):
print(i, z[i])
#adds a line between results
print()
for i, item in enumerate(z):
print(i, item)
#adds a line between results
print()
e = dict(a=5, b=4, c=18)
print(e)
and more
d = {}
d['a'] = 5
d['b'] = 4
d['c'] = 18
print(d)
and even more
items = d.items()
print(items)
and get them sorted
sorted(items)
print(items)
import math
data = [ 1, 2, 3, 4, 5 ]
average = sum(data) / float(len(data))
stddev = sum([ (x - average)**2 for x in data ]) / float(len(data))
stddev = math.sqrt(stddev)
print(average, '+/-', stddev)
import nltk
from nltk.corpus import *
corpus_root = '../Downloads/soft'
wordlists = PlaintextCorpusReader(corpus_root, '.*')
reader = CategorizedPlaintextCorpusReader('', r'.*\.txt', cat_pattern=r'(\w+)/*')
print(reader.categories())
import nltk
from nltk.corpus import stopwords
def content_fraction(text):
stopwords = nltk.corpus.stopwords.words('english')
words = [w for w in text.split() if w in stopwords]
return words
print(content_fraction('This is a test for the stopwords.'))
Start the file with # -- coding: utf-8 --
# -*- coding: utf-8 -*-
s = "این یک تست است"
print(s.split())
# import os
from nltk import *
counter = []
def wordcounter(file):
path = '../Downloads/soft/news/'+file
f = open(path, 'r')
mylist = []
for word in f:
sublist = word_tokenize(word)
mylist.append(sublist)
list = []
for item in mylist:
for i in item:
if i == "``":
pass
elif i == ',':
pass
elif i == '.':
pass
elif i == "''":
pass
else:
list.append(i)
counter.append(len(list))
return len(list)
directory = os.listdir('../Downloads/soft/news')
for file in directory:
print(file, ":", wordcounter(file), "words")
print("\nDirectory contains", len(counter), "files, with a total number of", sum(counter), "words.")
import re
s = "This is an email address.".split()
wordlist = [w for w in s if re.search('^e-?mail$', w)]
print(wordlist)
from nltk.corpus import *
import re
corpus_root = '/home/dariush/Downloads/soft'
wordlists = PlaintextCorpusReader(corpus_root, '.*')
corpus = CategorizedPlaintextCorpusReader('', r'.*\.txt', cat_pattern=r'(\w+)/*')
#print(corpus.categories())
words = corpus.words(categories=['news','general'])
found_words = [w for w in words for x in re.findall(r'stud.', w)]
#print(sorted(found_words))
print(nltk.FreqDist(found_words).most_common(100))
fdist = nltk.FreqDist(sorted(found_words))
print(fdist.N())
fdist.tabulate()
fdist.plot()
for w in fdist:
print(w, ":", fdist[w])
import nltk
from nltk.corpus import *
import re
corpus_root = '/home/dariush/Downloads/soft'
wordlists = PlaintextCorpusReader(corpus_root, '.*')
corpus = CategorizedPlaintextCorpusReader('', r'.*\.txt', cat_pattern=r'(\w+)/*')
#print(corpus.categories())
words = corpus.words(categories=['news','general'])
found_words = [w for w in words for x in re.findall(r'Mal', w)]
#print(sorted(found_words))
#print(nltk.FreqDist(found_words).most_common(100))
fdist = nltk.FreqDist(sorted(found_words))
#print(fdist.N())
fdist.tabulate()
fdist.plot()
text = nltk.Text(words)
for w in fdist:
text.concordance(w)
import nltk
from nltk.corpus import *
import re
raw = "I tested the tests for the teachers"
regexp = r'^(.*?)(ing|ly|ed|ious|ies|ive|es|s|ment)?$'
tokens = nltk.word_tokenize(raw)
list = []
for w in tokens:
stem = re.findall(regexp, w)
list.append(stem[0])
for item in list:
if item[1] == '':
pass
else:
print(item)
def segment(text, segs):
words = []
last = 0
for i in range(len(segs)):
if segs[i] == '1':
words.append(text[last:i+1])
last = i+1
words.append(text[last:])
return words
sent = "Researchonthe"
seg2 = "0000000101001"
print(segment(sent, seg2))
text = 'This is a sample text with some sentences and we are going to find the longest word in this text'
words = text.split()
maxlen = max(len(t) for t in words)
longest = [w for w in words if len(w) == maxlen]
print(longest, maxlen, 'characters')
calculator = lambda x : print(eval(x))
calculator(input("Enter a math expression: "))
import nltk
sent = 'The/AT grand/JJ jury/NN commented/VBD on/IN a/AT number/NN of/IN'
tagged_sent = [nltk.tag.str2tuple(w)for w in sent.split()]
print(tagged_sent)
import nltk
from nltk.corpus import *
import re
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('',r'.*\.txt',cat_pattern=r'(\w+)/*')
sents = corpus.sents(categories=['news','general'])
tagged_sent = [nltk.pos_tag(sent) for sent in sents]
tag_list = [tag[1][1] for tag in tagged_sent]
fd = nltk.FreqDist(tag_list)
print(fd.most_common())
import nltk
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('',r'.*\.txt',cat_pattern=r'(\w+)/*')
sents = corpus.sents(categories=['news','general'])
word_tag_pairs = [nltk.pos_tag(sent) for sent in sents]
all_tags = [w for w in word_tag_pairs]
precedings = []
for w in all_tags:
for a in w:
if a[1] == 'VBP':
current = w.index(a)
preceding = w[current-1]
precedings.append(preceding)
fdist = nltk.FreqDist([w[1] for w in precedings])
print(fdist.most_common())
cfd1 = nltk.ConditionalFreqDist((tag, word) for (word, tag) in precedings)
print([w for w in cfd1['RB'].most_common()])
import nltk
from nltk.corpus import *
corpus = CategorizedPlaintextCorpusReader('',r'.*\.txt', cat_pattern=r'(\w+)/*')
text = corpus.words(categories=['news','general'])
bigrams = nltk.bigrams(text)
keyword = 'which'
bigram_list = []
bigram_list.append([x[1] for x in bigrams if x[0] == keyword])
print(bigram_list, '\n\n')
tags = [nltk.pos_tag(word) for word in bigram_list]
print(tags, '\n\n')
taglist = [x[1] for x in tags[0]]
fdist = nltk.FreqDist(taglist)
fdist.tabulate()
fdist.plot()
import nltk
def process(sentence):
for (w1,t1), (w2,t2), (w3,t3) in nltk.trigrams(sentence):
if (t1.startswith('V') and t2 == 'TO' and t3.startswith('V')):
#print(sentence, '\n')
print(w1, w2, w3)
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('',r'.*\.txt', cat_pattern=r'(\w+)/*')
text = corpus.sents(categories=['news','general'])
for sent in text:
process(nltk.pos_tag(sent))
all_list = [[('item1')],[('item2')],[('item3')]]
flat_list = [item for sublist in all_list for item in sublist]
print(flat_list)
import nltk
sent = 'this is a a a a a test for the test'
fdist = nltk.FreqDist(word.lower() for word in nltk.word_tokenize(sent))
fdist.tabulate()
tags = ['IN', 'DT', 'JJ', 'NN', 'VBZ', 'VBN', 'VBN', 'DT','IN','NN']
tags = set(tags)
print(tags)
import csv as csv
from tabulate import tabulate
from operator import itemgetter
country = 'Switzerland'
year = '2015'
max_rank = 100
readdata = csv.reader(open("cwurData.csv"))
alldata = []
for data in readdata:
alldata.append(data)
countrydata = alldata[1:]
record = [(itemgetter(0,1,3)(record)) for record in countrydata if record[2] == country and record[-1] == year and int(record[0]) <= max_rank]
print('\n\n', country, 'TOP UNIVERSITIES (<',max_rank,') (',year,')\n')
print(tabulate(record,headers=(itemgetter(0,1,3)(alldata[0]))))
print('\n\nWORLD TOP', max_rank, 'UNIVERSITIES (',year,')\n')
data = [record[:3] for record in countrydata if int(record[0]) <= max_rank and record[-1] == year]
print(tabulate(data,headers=alldata[0]))
- Change the country and the number of slices in explode() adopts with the number of results
- Change the number of 'max_rank' to find more results
- Library itemgetter() helps to select specific indexes from a list, e.g. item[0], item[3]
- 'STRING'.title() capitalizes the first letters in the string
- Make a list with given string/int with "list = ['item'] * 10"
import csv as csv
from tabulate import tabulate
from operator import itemgetter
import matplotlib.pyplot as plt
country = 'United Kingdom'
year = '2015'
max_rank = 100
readdata = csv.reader(open("cwurData.csv"))
alldata = []
for data in readdata:
alldata.append(data)
title = alldata[0]
#print(title)
countrydata = alldata[1:]
record = [(itemgetter(0,1,3,12)(record)) for record in countrydata if record[2] == country and record[-1] == year and int(record[0]) <= max_rank]
print('\n\n', country, 'Top Universities (<',max_rank,') (',year,')\n')
print(tabulate(record,headers=(itemgetter(0,1,3,12)(alldata[0]))))
print('\n\n', country, 'Top Universities Based on',title[12].title())
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = [record[1] for record in countrydata if record[2] == country and record[-1] == year and int(record[0]) <= max_rank]
sizes = [record[12] for record in countrydata if record[2] == country and record[-1] == year and int(record[0]) <= max_rank]
slices = [0] * len(sizes)
explode = (slices)
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
shadow=True, startangle=90)
ax1.axis('equal')
plt.show()
Change 'column_name' vlaue to 'citations', 'score','world_rank' etc to see a different table
import csv as csv
from tabulate import tabulate
from operator import itemgetter
import matplotlib.pyplot as plt
country = 'Germany'
year = '2015'
max_rank = 100
column_name = 'score'
readdata = csv.reader(open("cwurData.csv"))
alldata = []
for data in readdata:
alldata.append(data)
column_title = dict([(v,k) for k, v in enumerate(alldata[0])])
column = column_title[column_name]
countrydata = alldata[1:]
record = [(data[0],data[1],data[3],data[column]) for data in countrydata if data[2] == country and data[-1] == year and int(data[0]) <= max_rank]
print('\n\n', country, 'Top Universities (<',max_rank,') (',year,')\n')
print(tabulate(record,headers=(itemgetter(0,1,3,column)(alldata[0]))))
from collections import defaultdict
pos = defaultdict(lambda: 'UNK')
sentence = 'this is a test'
pos['this'] = 'DT'
pos['test'] = ['NOUN','VERB']
for word in sentence.split():
print(word, pos[word])
sentence = 'This, is a list! full of . , punctions:'
cleaned = "".join(c for c in sentence if c not in ('!',':','\'','"','`',',','.'))
print(cleaned)
import nltk
from nltk import word_tokenize
from nltk.util import ngrams
from collections import Counter
import string
import re
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('',r'.*\.txt',cat_pattern=r'(\w+)/*')
sents = corpus.sents(categories=['news','general'])
word_tag_pairs = [nltk.pos_tag(sent) for sent in sents]
all_tags = [w for w in word_tag_pairs]
cleaned_sents = []
for sent in sents:
cleaned = [c for c in sent if c not in ('!',':','\'','"','`',',')]
cleaned_sent = nltk.pos_tag(cleaned)
cleaned_sents.append(cleaned_sent)
words = []
for entry in cleaned_sents:
for word in entry:
pattern = word[0],'/',word[1]
words.append(pattern)
text = []
for tagged_word in words:
tagged = ''.join(tagged_word)
text.append(tagged)
tagged_text = ' '.join(text)
#print(tagged_text)
token = nltk.word_tokenize(tagged_text)
bigrams = ngrams(token,2)
trigrams = ngrams(token,3)
fourgrams = ngrams(token,4)
fivegrams = ngrams(token,5)
a = Counter(fourgrams)
freqdist = nltk.FreqDist(a)
b = freqdist.most_common(50)
for i in b:
print(i)
import nltk
from tabulate import tabulate
from operator import itemgetter
import matplotlib.pyplot as plt
from nltk.util import ngrams
from collections import Counter
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('',r'.*\.txt',cat_pattern=r'(\w+)/*')
sents = corpus.sents(categories=['news','general'])
word_tag_pairs = [nltk.pos_tag(sent) for sent in sents]
all_tags = [w for w in word_tag_pairs]
cleaned_sents = []
for sent in sents:
cleaned = [c for c in sent if c not in ('!',':','\'','"','`',',')]
cleaned_sent = nltk.pos_tag(cleaned)
cleaned_sents.append(cleaned_sent)
words = []
for entry in cleaned_sents:
for word in entry:
pattern = word[0],'/',word[1]
words.append(pattern)
text = []
for tagged_word in words:
tagged = ''.join(tagged_word)
text.append(tagged)
tagged_text = ' '.join(text)
#print(tagged_text)
token = nltk.word_tokenize(tagged_text)
bigrams = ngrams(token,2)
trigrams = ngrams(token,3)
fourgrams = ngrams(token,4)
fivegrams = ngrams(token,5)
fourgram_bundle = Counter(fourgrams)
freqdist = nltk.FreqDist(fourgram_bundle)
bundles = freqdist.most_common(100)
entries = [a for a in bundles]
#print(tabulate(entries))
### END OF TRAINING
sentence = 'It has negative impact on us.'
tagged_sentence = nltk.pos_tag(sentence.split())
sentence_words = []
for word in tagged_sentence:
pattern = word[0],'/',word[1]
sentence_words.append(pattern)
tagged_sentence = []
for tagged_word in sentence_words:
tagged = ''.join(tagged_word)
tagged_sentence.append(tagged)
#final_tagged_sent = ' '.join(tagged_sentence)
#print(tagged_sentence)
sentence_fourgrams = ngrams(tagged_sentence,4)
for x in sentence_fourgrams:
for entry in entries:
if entry[0] == x:
print(entry)
import nltk
from nltk import ngrams
import random
corpus = nltk.corpus.CategorizedPlaintextCorpusReader('', r'.*\.txt', cat_pattern=r'(\w+)/*')
documents = [(list(corpus.words(fileid)), category)
for category in corpus.categories()
for fileid in corpus.fileids(category)]
#Corpus sentences ngram
sents = corpus.sents()
stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', 'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 'shouldn', 'wasn', 'weren', 'won', 'wouldn', '.', ',', '"', "'", '?', '!', ':', ';', '(', ')', '[', ']', '{', '}']
gramslist= []
n = 1
for sent in sents:
sent = [x for x in sent if x not in stop_words]
fourgrams = ngrams(sent, n)
for grams in fourgrams:
gramslist.append(grams)
def document_features(document):
document_words = set(document)
features = {}
for word in gramslist:
features['contains({})'.format(word)] = (word in document_words)
return features
random.shuffle(documents)
featuresets = [(document_features(d), c) for (d,c) in documents]
train_set, test_set = featuresets[102:], featuresets[:102]
classifier = nltk.NaiveBayesClassifier.train(train_set)
print(nltk.classify.accuracy(classifier, test_set))
print(classifier.show_most_informative_features(10))